/*
 * The Broad Institute
 * SOFTWARE COPYRIGHT NOTICE AGREEMENT
 * This is copyright (2007-2008) by the Broad Institute/Massachusetts Institute
 * of Technology.  It is licensed to You under the Gnu Public License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 *  the License.  You may obtain a copy of the License at
 *
 *    http://www.opensource.org/licenses/gpl-2.0.php
 *
 * This software is supplied without any warranty or guaranteed support
 * whatsoever. Neither the Broad Institute nor MIT can be responsible for its
 * use, misuse, or functionality.
 */
package org.broad.igv.data;

//~--- non-JDK imports --------------------------------------------------------
import org.broad.igv.util.AsciiLineReader;
import org.apache.log4j.Logger;


//~--- JDK imports ------------------------------------------------------------

import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.IOException;

import java.io.InputStream;

import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import org.broad.igv.feature.ParsingUtils;
import org.broad.igv.track.TrackType;

/**
 * Class description
 *
 *
 * @version    Enter version here..., 08/11/11
 * @author     Enter your name here...
 */
public class IGVDatasetParser {

    private static Logger log = Logger.getLogger(IGVDatasetParser.class);
    private ResourceLocator dataResourceLocator;
    private int chrColumn;
    private int startColumn;
    private int endColumn;
    private int firstDataColumn;
    private boolean hasEndLocations;
    private boolean hasCalls;
    private String genomeId;

    /**
     * Constructs ...
     *
     *
     * @param copyNoFile
     * @param genomeId
     */
    public IGVDatasetParser(ResourceLocator copyNoFile, String genomeId) {
        this.dataResourceLocator = copyNoFile;
        this.genomeId = genomeId;
        initParameters();
    }

    private void initParameters() {
        String tmp = dataResourceLocator.getPath().endsWith(".txt")
            ? dataResourceLocator.getPath().substring(0,
            dataResourceLocator.getPath().length() - 4) : dataResourceLocator.getPath();

        if (tmp.endsWith(".igv"))
        {
            chrColumn = 0;
            startColumn = 1;
            endColumn = 2;
            firstDataColumn = 4;
            hasEndLocations = true;
            hasCalls = false;

        } else
        {

            // Assume a snp file
            chrColumn = 1;
            startColumn = 2;
            endColumn = -1;
            firstDataColumn = 3;
            hasEndLocations = false;
            hasCalls = tmp.endsWith(".xcn") || tmp.endsWith(".snp");
        }

    }

    /**
     * Scan the datafile for chromosome breaks.
     * @param dataset
     * @return
     */
    public List<ChromosomeSummary> scan(IGVDataset dataset) {


        float dataMin = 0;
        float dataMax = 0;
        long filePosition = 0;

        try
        {

            List<ChromosomeSummary> chrSummaries = new ArrayList();
            int skipColumns = hasCalls ? 2 : 1;

            // BufferedReader reader = ParsingUtils.openBufferedReader(dataResourceLocator);
            InputStream is = ParsingUtils.openInputStream(dataResourceLocator);
            AsciiLineReader reader = new AsciiLineReader(is);

            // Infer datatype from extension.  This can be overriden in the
            // comment section
            if (isCopyNumberFileExt(dataResourceLocator.getPath()))
            {
                dataset.setTrackType(TrackType.COPY_NUMBER);
                dataset.getTrackProperties().setWindowingFunction(WindowFunction.median);
            } else
            {
                dataset.getTrackProperties().setWindowingFunction(WindowFunction.mean);
            }

            // Parse comments, if any
            String nextLine = reader.readLine(true);
            filePosition += nextLine.length();

            while (nextLine.startsWith("#") || (nextLine.trim().length() == 0))
            {
                if (nextLine.length() > 0)
                {
                    parseComment(nextLine, dataset);
                }
                nextLine = reader.readLine(true);
                filePosition += nextLine.length();
            }

            // Parse column headings
            String[] data = nextLine.trim().split("\t");
            String[] headings = getHeadings(data, skipColumns);
            dataset.setDataHeadings(headings);

            // Infer if the data is logNormalized by looking for negative data values.
            // Assume it is not until proven otherwise
            boolean logNormalized = false;

            ChromosomeSummary chrSummary = null;
            WholeGenomeData wgData = new WholeGenomeData(headings);
            int nRows = 0;

            while ((nextLine = reader.readLine(true)) != null)
            {
                int nBytes = nextLine.length();
                int nTokens = ParsingUtils.split(nextLine.trim(), tokens, '\t');
                if (nTokens > 0)
                {
                    String thisChr = ParsingUtils.convertChrString(genomeId, tokens[chrColumn].trim());
                    if (chrSummary == null || !thisChr.equals(chrSummary.getName()))
                    {
                        // Update whole genome and previous chromosome summary, unless this is
                        // the first chromosome
                        if (chrSummary != null)
                        {
                            updateWholeGenome(chrSummary.getName(), dataset, headings, wgData);
                            chrSummary.setNDataPoints(nRows);
                        }

                        // Shart the next chromosome
                        chrSummary = new ChromosomeSummary(thisChr, filePosition);
                        chrSummaries.add(chrSummary);
                        nRows = 0;
                        wgData = new WholeGenomeData(headings);
                    }

                    int location = -1;
                    try
                    {
                        location = Integer.parseInt(tokens[startColumn]);

                    } catch (NumberFormatException numberFormatException)
                    {
                        log.info("Location column is not a number.  Skipping row: " + nextLine);
                        continue;
                    }

                    wgData.locations.add(location);

                    if (nTokens > headings.length * skipColumns + firstDataColumn)
                    {

                        // TODO -- throw error here.  this will cause an index out of bounds exception
                        log.info("Unexpected number of tokens.  Expected " + headings.length + firstDataColumn + " found: " + nTokens + "   (" + nextLine + ")");
                    }

                    for (int i = firstDataColumn; i < nTokens; i += skipColumns)
                    {
                        int idx = (i - firstDataColumn) / skipColumns;
                        float copyNo = Float.NaN;
                        try
                        {
                            copyNo = Float.parseFloat(tokens[i]);
                            dataMin = Math.min(dataMin, copyNo);
                            dataMax = Math.max(dataMax, copyNo);
                        } catch (NumberFormatException e)
                        {
                        }
                        if (copyNo < 0)
                        {
                            logNormalized = true;
                        }
                        String heading = headings[idx];
                        wgData.data.get(heading).add(copyNo);
                    }
                    nRows++;
                }
                filePosition += nBytes;
            }

            // Update last chromosome
            if (chrSummary != null)
            {
                updateWholeGenome(chrSummary.getName(), dataset, headings, wgData);
                chrSummary.setNDataPoints(nRows);
            }


            dataset.setLogNormalized(logNormalized);
            dataset.setDataMin(dataMin);
            dataset.setDataMax(dataMax);

            reader.close();

            return chrSummaries;

        } catch (FileNotFoundException e)
        {

            // DialogUtils.showError("SNP file not found: " + dataSource.getCopyNoFile());
            log.error("CN file not found: " + dataResourceLocator);
            throw new RuntimeException(e);
        } catch (IOException e)
        {

            // DialogUtils.showError("Error parsing SNP file: " + dataSource.getCopyNoFile());
            log.error(dataResourceLocator.getPath(), e);
            throw new RuntimeException(e);
        }

    }

    /**
     * Load data for a single chromosome.
     *
     * @param chrSummary
     * @param columnHeaders
     * @return
     */
    public ChromosomeData loadChromosomeData(
        ChromosomeSummary chrSummary, String[] columnHeaders) {

        // InputStream is = null;
        try
        {
            int skipColumns = hasCalls ? 2 : 1;

            // Get an estimate of the number of snps (rows).  THIS IS ONLY AN ESTIMATE
            int nRowsEst = chrSummary.getNDataPts();


            // BufferedReader reader = ParsingUtils.openBufferedReader(dataResourceLocator);
            InputStream is = ParsingUtils.openInputStream(dataResourceLocator);
            position(is, chrSummary.getStartPosition());
            AsciiLineReader reader = new AsciiLineReader(is);


            int nRows = 0;

            String nextLine = reader.readLine();


            // Create containers to hold data
            IntArrayList startLocations = new IntArrayList(nRowsEst);
            IntArrayList endLocations = (hasEndLocations ? new IntArrayList(nRowsEst) : null);


            Map<String, FloatArrayList> dataMap = new HashMap();
            for (String h : columnHeaders)
            {
                dataMap.put(h, new FloatArrayList(nRowsEst));
            }

// Begin loop through rows
            String chromosome = chrSummary.getName();
            boolean chromosomeStarted = false;
            nRows =
                0;
            while ((nextLine != null) && (nextLine.trim().length() > 0))
            {

                try
                {
                    int nTokens = ParsingUtils.split(nextLine, tokens, '\t');

                    String thisChromosome = ParsingUtils.convertChrString(genomeId,
                        tokens[chrColumn].trim());
                    if (thisChromosome.equals(chromosome))
                    {
                        chromosomeStarted = true;

                        // chromosomeData.setMarkerId(nRows, tokens[0]);

                        int start = Integer.parseInt(tokens[startColumn].trim());
                        if (hasEndLocations)
                        {
                            endLocations.add(Integer.parseInt(tokens[endColumn].trim()));
                        }

                        startLocations.add(start);


                        for (int i = firstDataColumn; i <
                            nTokens; i +=
                                skipColumns)
                        {
                            int idx = (i - firstDataColumn) / skipColumns;
                            float copyNo = Float.NaN;
                            try
                            {
                                copyNo = Float.parseFloat(tokens[i].trim());
                            } catch (NumberFormatException e)
                            {
                            }
                            String heading = columnHeaders[idx];
                            dataMap.get(heading).add(copyNo);
                        }

                        nRows++;


                    } else if (chromosomeStarted)
                    {
                        break;
                    }

                } catch (NumberFormatException numberFormatException)
                {

                    // Skip line
                    log.info("Skipping line (NumberFormatException) " + nextLine);
                }

                nextLine = reader.readLine();
            }

// Loop complete
            ChromosomeData cd = new ChromosomeData(chrSummary.getName());
            cd.setStartLocations(startLocations.toArray());
            if (hasEndLocations)
            {
                cd.setEndLocations(endLocations.toArray());
            }

            for (String h : columnHeaders)
            {
                cd.setData(h, dataMap.get(h).toArray());
            }

            return cd;

        } catch (IOException ex)
        {
            log.error("Error parsing cn file", ex);
            throw new RuntimeException("Error parsing cn file", ex);
        }

    }

    /**
     * Note:  This is an exact copy of the method in GCTDatasetParser.  Refactor to merge these
     * two parsers, or share a common base class.
     * @param comment
     * @param dataset
     */
    private void parseComment(String comment, IGVDataset dataset) {

        String tmp = comment.substring(1, comment.length());
        if (tmp.startsWith("track"))
        {
            DatasetParserUtils.parseTrackLine(tmp, dataset.getTrackProperties());
        } else
        {
            String[] tokens = tmp.split("=");
            if (tokens.length != 2)
            {
                return;
            }

            String key = tokens[0].trim().toLowerCase();
            if (key.equals("name"))
            {
                dataset.setName(tokens[1].trim());
            } else if (key.equals("type"))
            {

                try
                {
                    dataset.setTrackType(TrackType.valueOf(tokens[1].trim().toUpperCase()));
                } catch (Exception exception)
                {

                // Ignore
                }
            }
        }
    }

    private boolean isCopyNumberFileExt(String filename) {
        String tmp = (filename.endsWith(".txt")
            ? filename.substring(0, filename.length() - 4) : filename);
        return tmp.endsWith(".cn") || tmp.endsWith(".xcn") || tmp.endsWith(".snp");
    }

    /**
     * Return the sample headings for the copy number file.
     * @param tokens
     * @param skipColumns
     * @return
     */
    public String[] getHeadings(String[] tokens, int skipColumns) {

        int nHeadings = (tokens.length - firstDataColumn) / skipColumns;
        String[] headings = new String[nHeadings];
        for (int i = firstDataColumn; i <
            tokens.length; i +=
                skipColumns)
        {
            int idx = (i - firstDataColumn) / skipColumns;
            headings[idx] = tokens[i].trim();
        }

        return headings;

    }
    static String[] tokens = new String[10000];

    /**
     * Return a rough estimate of the number of rows in the file.  Used to estimate
     * sampling frequency for whole genome view.
     *
     * @param filename
     *
     * @return
     */
    public int estimateNumberOfRows(String filename) {
        File f = new File(filename);
        BufferedReader reader = null;
        long size = f.length();
        try
        {

            reader = new BufferedReader(new FileReader(filename));
            String firstLine = reader.readLine();
            int nColumns = firstLine.split("\t").length;
            double factor = filename.toLowerCase().endsWith(".cn") ? 6 : 4.4;
            return (int) (size / (nColumns * factor));
        } catch (Exception ex)
        {
            log.error(ex.getMessage());
            return 250000;
        } finally
        {
            try
            {
                reader.close();
            } catch (IOException ex)
            {
                ex.printStackTrace();
            }

        }
    }

    /**
     * Positin the stream at the specified position.
     * @param is
     * @param position
     * @throws java.io.IOException
     */
    private void position(InputStream is, long position) throws IOException {
        // This is ugly, but most streams are filestreams and we want to take advantage of 
        // the file channel
        if (is instanceof FileInputStream)
        {
            ((FileInputStream) is).getChannel().position(position);
        } else
        {
            is.skip(position);
        }




    }

    private void updateWholeGenome(String currentChromosome, IGVDataset dataset, String[] headings, IGVDatasetParser.WholeGenomeData sumData) {
        // Update whole genome data
        dataset.getGenomeSummary().addLocations(currentChromosome, sumData.locations.toArray());
        for (String h : headings)
        {
            dataset.getGenomeSummary().addData(h, currentChromosome, sumData.data.get(h).toArray());
        }
    }

    class WholeGenomeData {

        String[] headings;
        IntArrayList locations = new IntArrayList(25000);
        Map<String, FloatArrayList> data = new HashMap();

        WholeGenomeData(String[] headings) {
            this.headings = headings;
            for (String h : headings)
            {
                data.put(h, new FloatArrayList(25000));
            }
        }

        int size() {
            return locations.size();
        }
    }
}
