/*
 * Copyright (c) 2007-2010 by The Broad Institute, Inc. and the Massachusetts Institute of Technology.
 * All Rights Reserved.
 *
 * This software is licensed under the terms of the GNU Lesser General Public License (LGPL), Version 2.1 which
 * is available at http://www.opensource.org/licenses/lgpl-2.1.php.
 *
 * THE SOFTWARE IS PROVIDED "AS IS." THE BROAD AND MIT MAKE NO REPRESENTATIONS OR WARRANTIES OF
 * ANY KIND CONCERNING THE SOFTWARE, EXPRESS OR IMPLIED, INCLUDING, WITHOUT LIMITATION, WARRANTIES
 * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, NONINFRINGEMENT, OR THE ABSENCE OF LATENT
 * OR OTHER DEFECTS, WHETHER OR NOT DISCOVERABLE.  IN NO EVENT SHALL THE BROAD OR MIT, OR THEIR
 * RESPECTIVE TRUSTEES, DIRECTORS, OFFICERS, EMPLOYEES, AND AFFILIATES BE LIABLE FOR ANY DAMAGES OF
 * ANY KIND, INCLUDING, WITHOUT LIMITATION, INCIDENTAL OR CONSEQUENTIAL DAMAGES, ECONOMIC
 * DAMAGES OR INJURY TO PROPERTY AND LOST PROFITS, REGARDLESS OF WHETHER THE BROAD OR MIT SHALL
 * BE ADVISED, SHALL HAVE OTHER REASON TO KNOW, OR IN FACT SHALL KNOW OF THE POSSIBILITY OF THE
 * FOREGOING.
 */

package org.broad.tribble.util;

import org.apache.log4j.Logger;

import java.io.*;
import java.net.URL;
import java.net.URLConnection;
import java.util.*;
import java.util.zip.GZIPInputStream;

/** @author jrobinso */
public class ParsingUtils {

    private static Logger log = Logger.getLogger(ParsingUtils.class);

    public static BufferedReader openBufferedReader(String path)
            throws IOException {
        InputStream stream = openInputStream(path);
        return new BufferedReader(new InputStreamReader(stream));
    }


    public static AsciiLineReader openAsciiReader(String path)
            throws IOException {
        InputStream stream = openInputStream(path);
        return new AsciiLineReader(stream);

    }


    public static InputStream openInputStream(String path)
            throws IOException {

        InputStream inputStream = null;
        if (path.startsWith("ftp:")) {
            // TODO -- throw an appropriate exception
            throw new RuntimeException("FTP streams not supported.");
        }
        if (path.startsWith("http:") || path.startsWith("https:") || path.startsWith("file:")) {
            URL url = new URL(path);
            URLConnection connection = url.openConnection();
            inputStream = connection.getInputStream();
        } else {
            File file = new File(path);
            inputStream = new FileInputStream(file);
        }

        if (path.endsWith("gz")) {
            return new GZIPInputStream(inputStream);
        } else {
            return inputStream;
        }

    }


    public static int estimateLineCount(String filename) {

        AsciiLineReader reader = null;
        try {
            long fileLength = 0;
            //TODO - ftp
            if (filename.startsWith("http:") || filename.startsWith("https:")) {
                URL url = new URL(filename);
                fileLength = Long.parseLong(HttpUtils.getHeaderField(url, "Content-length"));
            } else {
                fileLength = (new File(filename)).length();
            }


            reader = openAsciiReader(filename);
            String nextLine;
            int lines = 0;
            while ((nextLine = reader.readLine()) != null & lines < 100) {
                lines++;
            }
            double bytesPerLine = ((double) reader.getPosition()) / lines;
            int nLines = (int) (fileLength / bytesPerLine);
            return nLines;

        } catch (Exception e) {
            log.error("Error estimating line count", e);
            return 1000;
        } finally {
            reader.close();
        }

    }
    /**
     * join an array of strings given a seperator
     * @param separator the string to insert between each array element
     * @param strings the array of strings
     * @return a string, which is the joining of all array values with the separator
     */
    public static String join(String separator, String[] strings) {
        return join(separator, strings, 0, strings.length);
    }

    public static String join(String separator, String[] strings, int start, int end) {
        if ((end - start) == 0) {
            return "";
        }
        StringBuilder ret = new StringBuilder(strings[start]);
        for (int i = start + 1; i < end; ++i) {
            ret.append(separator);
            ret.append(strings[i]);
        }
        return ret.toString();
    }


    /**
     * Split the string into tokesn separated by the given delimiter.  Profiling has
     * revealed that the standard string.split() method typically takes > 1/2
     * the total time when used for parsing ascii files.
     *
     * @param aString the string to split
     * @param tokens  an array to hold the parsed tokens
     * @param delim   character that delimits tokens
     * @return the number of tokens parsed
     */
    public static int split(String aString, String[] tokens, char delim) {

        int maxTokens = tokens.length;
        int nTokens = 0;
        int start = 0;
        int end = aString.indexOf(delim);
        if (end < 0) {
            tokens[nTokens++] = aString;
            return nTokens;
        }
        while ((end > 0) && (nTokens < maxTokens)) {
            //tokens[nTokens++] = new String(aString.toCharArray(), start, end-start); //  aString.substring(start, end);
            tokens[nTokens++] = aString.substring(start, end);
            start = end + 1;
            end = aString.indexOf(delim, start);

        }

        // Add the trailing string
        if (nTokens < maxTokens) {
            String trailingString = aString.substring(start);
            tokens[nTokens++] = trailingString;
        }
        return nTokens;
    }



    // trim a string for the given character (i.e. not just whitespace)
    public static String trim(String str, char ch) {
        char[] array = str.toCharArray();
        int start = 0;
        while (start < array.length && array[start] == ch)
            start++;

        int end = array.length - 1;
        while (end > start && array[end] == ch)
            end--;

        return str.substring(start, end + 1);
    }


    /**
     * Split the string into tokesn separated by tab or space.  This method
     * was added so support wig and bed files, which apparently accept
     * either.
     *
     * @param aString the string to split
     * @param tokens  an array to hold the parsed tokens
     * @return the number of tokens parsed
     */
    public static int splitWhitespace(String aString, String[] tokens) {

        int maxTokens = tokens.length;
        int nTokens = 0;
        int start = 0;
        int tabEnd = aString.indexOf('\t');
        int spaceEnd = aString.indexOf(' ');
        int end = tabEnd < 0 ? spaceEnd : spaceEnd < 0 ? tabEnd : Math.min(spaceEnd, tabEnd);
        while  ((end > 0) && (nTokens < maxTokens)) {
            //tokens[nTokens++] = new String(aString.toCharArray(), start, end-start); //  aString.substring(start, end);
            tokens[nTokens++] = aString.substring(start, end);

            start = end + 1;
            // Gobble up any whitespace before next token -- don't gobble tabs, consecutive tabs => empty cell
            while (start < aString.length() && aString.charAt(start) == ' ') {
                start++;
            }

            tabEnd = aString.indexOf('\t', start);
            spaceEnd = aString.indexOf(' ', start);
            end = tabEnd < 0 ? spaceEnd : spaceEnd < 0 ? tabEnd : Math.min(spaceEnd, tabEnd);

        }

        // Add the trailing string
        if (nTokens < maxTokens) {
            String trailingString = aString.substring(start);
            tokens[nTokens++] = trailingString;
        }
        return nTokens;
    }


    /**
     * Method description
     *
     * @param file
     *
     * @return
     */
    public static List<String> loadRegions(File file) {
        try {
            FileInputStream fileInput = new FileInputStream(file);
            BufferedReader reader = new BufferedReader(new InputStreamReader(fileInput));
            String nextLine;
            List<String> features = new ArrayList<String>();
            while ((nextLine = reader.readLine()) != null && (nextLine.trim().length() > 0)) {
                try {
                    if (nextLine.startsWith("chr")) {
                        String[] tokens = nextLine.split("\t");
                        String region = tokens[0] + ":" + tokens[1] + "-" + tokens[2];
                        features.add(region);
                    }
                } catch (NumberFormatException e) {
                    log.error("Error parsing numer in line: " + nextLine);
                }
            }

            reader.close();
            return features;
        } catch (IOException e) {
            e.printStackTrace();
            return null;
        }
    }


    /*
    * Legacy conversion for human genome files.
    *
    * @param chr
    * @return
    */
    /** Method description */
    static Set<String> legacyGenomes = new HashSet();

    static {
        legacyGenomes.add("S._cerevisiae");
        legacyGenomes.add("Yeast_S._pombe");
        legacyGenomes.add("Chicken_galGal3");
    }

    /** Method description */
    private static Map<String, String> humanChrLookupTable = new HashMap(100);
    private static Map<String, String> chrLookupTable = new HashMap(100);

    // Pre seed chr table with "1,2,3,  etc"

    static {
        for (int i = 0; i < 23; i++) {
            humanChrLookupTable.put(String.valueOf(i), "chr" + i);
            //chrLookupTable.put("Chr" + i, "chr" + i);
        }
        humanChrLookupTable.put("X", "chrX");
        humanChrLookupTable.put("Y", "chrY");
        humanChrLookupTable.put("M", "chrM");
        humanChrLookupTable.put("x", "chrX");
        humanChrLookupTable.put("y", "chrY");
        humanChrLookupTable.put("m", "chrM");
    }


    public static String convertChrString(String genomeId, String str) {

        if (str == null) {
            return null;
        }

        String chr = str;

        // Special legacy rules for UCSC human & mouse
        if (genomeId != null && genomeId.startsWith("hg") || genomeId.startsWith("mm")) {
            if (genomeId.startsWith("hg")) {
                str = str.replace("23", "X");
                str = str.replace("24", "Y");
            } else if (genomeId.startsWith("mm")) {
                str = str.replace("20", "X");
                str = str.replace("21", "Y");
            }
            chr = humanChrLookupTable.get(chr);
            if (chr == null) {
                chr = str;
                humanChrLookupTable.put(str, chr);
            }
        }

        // All other genomes.  Lookup table is used to prevent keeping zillions of identical strings
        else {
            chr = chrLookupTable.get(chr);
            if (chr == null) {
                chr = str;
                chrLookupTable.put(str, chr);
            }

        }

        return chr;

    }


}