/*
 * The Broad Institute
 * SOFTWARE COPYRIGHT NOTICE AGREEMENT
 * This is copyright (2007-2008) by the Broad Institute/Massachusetts Institute 
 * of Technology.  It is licensed to You under the Gnu Public License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 *  the License.  You may obtain a copy of the License at
 *
 *    http://www.opensource.org/licenses/gpl-2.0.php
 *
 * This software is supplied without any warranty or guaranteed support
 * whatsoever. Neither the Broad Institute nor MIT can be responsible for its
 * use, misuse, or functionality.
*/

/*
 * To change this template, choose Tools | Templates
 * and open the template in the editor.
 */
package org.broad.igv.feature;

import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.net.URL;
import java.net.URLConnection;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.Hashtable;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.zip.GZIPInputStream;
import net.sf.samtools.util.AsciiLineReader;
import org.apache.log4j.Logger;
import org.broad.igv.data.ResourceLocator;

/**
 *
 * @author jrobinso
 */
public class ParsingUtils {

    private static Logger log = Logger.getLogger(ParsingUtils.class);



    /**
     * Method description
     *
     *
     * @param locator
     *
     * @return
     *
     * @throws FileNotFoundException
     * @throws IOException
     */
    public static BufferedReader openBufferedReader(ResourceLocator locator)
            throws FileNotFoundException, IOException {

        if (locator.isLocal()) {
            return openBufferedReader(locator.getPath());
        } else {
            // Note we are assuming the contents is gzipped
            URL url = new URL(
                    locator.getServerURL() + "?method=getContents&file=" + locator.getPath());
            URLConnection connection = url.openConnection();
            return new BufferedReader(
                    new InputStreamReader(new GZIPInputStream(connection.getInputStream())));
        }
    }

    public static BufferedReader openBufferedReader(String pathOrUrl)
            throws FileNotFoundException, IOException {
        BufferedReader reader = null;

        if (pathOrUrl.startsWith("http:") || pathOrUrl.startsWith("file:")) {
            URL url = new URL(pathOrUrl);
            URLConnection connection = url.openConnection();
            reader = new BufferedReader(new InputStreamReader(connection.getInputStream()));
        } else {
            File file = new File(pathOrUrl);

            FileInputStream fileInput = new FileInputStream(file);
            if (file.getName().endsWith("gz")) {
                GZIPInputStream in = new GZIPInputStream(fileInput);
                reader = new BufferedReader(new InputStreamReader(in));
            } else {
                reader = new BufferedReader(new InputStreamReader(fileInput));
            }
        }

        return reader;
    }

    /**
     * Method description
     *
     *
     * @param locator
     *
     * @return
     *
     * @throws FileNotFoundException
     * @throws IOException
     */
    public static AsciiLineReader openAsciiReader(ResourceLocator locator)
            throws FileNotFoundException, IOException {
        InputStream stream = openInputStream(locator);
        return new AsciiLineReader(stream);

    }

    /**
     * Method description
     *
     *
     * @param locator
     *
     * @return
     *
     * @throws FileNotFoundException
     * @throws IOException
     */
    public static InputStream openInputStream(ResourceLocator locator)
            throws FileNotFoundException, IOException {
        BufferedReader reader = null;

        if (locator.isLocal()) {
            File file = new File(locator.getPath());

            FileInputStream fileInput = new FileInputStream(file);
            if (file.getName().endsWith("gz")) {
                return new GZIPInputStream(fileInput);

            } else {
                return fileInput;
            }
        } else {
            URL url = new URL(
                    locator.getServerURL() + "?method=getContents&file=" + locator.getPath());
            URLConnection connection = url.openConnection();

            // Note -- assumption that url stream is compressed!
            return new GZIPInputStream(connection.getInputStream());
        }
    }

    /**
     * Split the string into tokesn separated by the given delimiter.  Profiling has
     * revealed that the standard string.split() method typically takes > 1/2
     * the total time when used for parsing ascii files.
     *
     * @param aString  the string to split
     * @param tokens an array to hold the parsed tokens
     * @param delim  character that delimits tokens
     * @return the number of tokens parsed
     */
    public static int split(String aString, String[] tokens, char delim) {

        int maxTokens = tokens.length;
        int nTokens = 0;
        int start = 0;
        int end = aString.indexOf(delim);
        if (end < 0) {
            tokens[nTokens++] = aString;
            return nTokens;
        }
        while ((end > 0) && (nTokens < maxTokens)) {
            //tokens[nTokens++] = new String(aString.toCharArray(), start, end-start); //  aString.substring(start, end);
            tokens[nTokens++] = aString.substring(start, end);
            start = end + 1;
            end = aString.indexOf(delim, start);

        }

        // Add the trailing string,  if there is room and if it is not empty.
        if (nTokens < maxTokens) {
            String trailingString = aString.substring(start);
            if (trailingString.length() > 0) {
                tokens[nTokens++] = trailingString;
            }
        }
        return nTokens;
    }

    /**
     * Method description
     *
     *
     * @param file
     *
     * @return
     */
    public static List<String> loadRegions(File file) {
        try {
            FileInputStream fileInput = new FileInputStream(file);
            BufferedReader reader = new BufferedReader(new InputStreamReader(fileInput));
            String nextLine;
            List<String> features = new ArrayList<String>();
            while ((nextLine = reader.readLine()) != null && (nextLine.trim().length() > 0)) {
                try {
                    if (nextLine.startsWith("chr")) {
                        String[] tokens = nextLine.split("\t");
                        String region = tokens[0] + ":" + tokens[1] + "-" + tokens[2];
                        features.add(region);
                    }
                } catch (NumberFormatException e) {
                    log.error("Error parsing numer in line: " + nextLine);
                }
            }

            reader.close();
            return features;
        } catch (IOException e) {
            e.printStackTrace();
            return null;
        }
    }

    /*
     * Legacy conversion for human genome files.
     *
     * @param chr
     * @return
     */
    /**
     * Method description
     *
     *
     * @param genomeId
     * @param chr
     *
     * @return
     */
    static Set<String> legacyGenomes = new HashSet();


    static {
        legacyGenomes.add("S._cerevisiae");
        legacyGenomes.add("Yeast_S._pombe");
        legacyGenomes.add("Chicken_galGal3");
    }
    /**
     * Method description
     *
     *
     * @param genomeId
     * @param chr
     *
     * @return
     */
    private static Map<String, String> chrLookupTable = new Hashtable();

    private static String lookupChr(String genomeId, String str) {
        String chr = chrLookupTable.get(str);
        if (chr == null) {
            chr = str;

            if (genomeId.startsWith("hg")) {
                chr = chr.replace("23", "X");
                chr = chr.replace("24", "Y");
                chr = chr.startsWith("chr") ? chr : "chr" + chr;
            } else if (genomeId.startsWith("mm")) {
                chr = chr.replace("20", "X");
                chr = chr.replace("21", "Y");
                chr = chr.startsWith("chr") ? chr : "chr" + chr;
            } else if (legacyGenomes.contains(genomeId)) {
                chr = chr.startsWith("chr") ? chr : "chr" + chr;
            }
            chrLookupTable.put(str, chr.trim());
        }
        return chr;

    }

    public static String convertChrString(String genomeId, String chr) {

        // Chromsomes X and Y in human are often represented as the numbers 23 and 24
        // TODO -- take genome as an arugment, this substitution mighy not be valid for non
        // human genomes


        return lookupChr(genomeId, chr);
    }
}
