/*
 * The Broad Institute
 * SOFTWARE COPYRIGHT NOTICE AGREEMENT
 * This is copyright (2007-2008) by the Broad Institute/Massachusetts Institute
 * of Technology.  It is licensed to You under the Gnu Public License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 *  the License.  You may obtain a copy of the License at
 *
 *    http://www.opensource.org/licenses/gpl-2.0.php
 *
 * This software is supplied without any warranty or guaranteed support
 * whatsoever. Neither the Broad Institute nor MIT can be responsible for its
 * use, misuse, or functionality.
 */






package org.broad.igv.preprocess;

//~--- non-JDK imports --------------------------------------------------------

import org.broad.igv.feature.Genome;
import org.broad.igv.feature.GenomeManager;
import org.broad.igv.feature.ParsingUtils;

//~--- JDK imports ------------------------------------------------------------

import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStreamReader;

import java.util.HashMap;
import java.util.Map;

/**
 *
 * @author jrobinso
 */
public class AlignmentFileParser {

    enum FileType { ALIGNED, REALIGNED    }
    ;

    static FileType type = FileType.ALIGNED;
    File file;

    /**
     * Constructs ...
     *
     *
     * @param file
     */
    public AlignmentFileParser(File file) {
        this.file = file;
        type = (file.getName().endsWith("realign") || file.getName().endsWith("realign.txt"))
               ? FileType.REALIGNED : FileType.ALIGNED;
    }

    /**
     * Create a map of chromosome -> read counts from the given alighment file
     *
     * @param file
     * @param resolution
     * @param extensionFactor
     * @param genomeId
     * @param lastPositionPerChr
     * @return
     */
    public Map<String, short[]> doReadCounts(int resolution, int extensionFactor,
            String genomeId, Map<String, Integer> lastPositionPerChr) {

        // Optional.  If supplied the genome can be used to get chr lengths, which in
        // turn can be used to size arrays more effeciently.
        Genome genome = ((genomeId == null)
                         ? null : GenomeManager.getInstance().getGenome(genomeId));

        Map<String, short[]> readCountMap = new HashMap();
        BufferedReader reader = null;
        try
        {
            reader = new BufferedReader(new InputStreamReader(new FileInputStream(file)));
            String nextLine;

            // Create a buffer to hold tokens;
            String[] tokens = new String[20];
            while ((nextLine = reader.readLine()) != null && (nextLine.trim().length() > 0))
            {
                nextLine = nextLine.trim();
                if(nextLine.startsWith("#")) {
                    continue;
                }
                
                char separator = (type == FileType.ALIGNED) ? '\t' : ' ';
                int nTokens = ParsingUtils.split(nextLine, tokens, separator);
                try
                {
                    if ((type == FileType.ALIGNED && nTokens >= 3) ||
                        (type == FileType.REALIGNED && nTokens >= 4))
                        
                    {

                        // Read alignment info
                        LocusInfo locus = (type == FileType.ALIGNED) ? getLocusInfo(tokens, nTokens) :
                            getLocusInfoSolexa(tokens, nTokens);

                        if (locus.strand.equalsIgnoreCase("+"))
                        {
                            locus.end = locus.end + extensionFactor;
                        }
                        else if (locus.strand.equalsIgnoreCase("-"))
                        {
                            locus.start = Math.max(0, locus.start - extensionFactor);
                        }


                        // Update the last position for this chromosome
                        int lastPosition = lastPositionPerChr.containsKey(locus.chr)
                                           ? lastPositionPerChr.get(locus.chr) : 0;
                        lastPosition = Math.max(lastPosition, locus.end);
                        lastPositionPerChr.put(locus.chr, lastPosition);

                        // Increment count
                        short[] counts = readCountMap.get(locus.chr);
                        if (counts == null)
                        {
                            int chrLength = 250000000;
                            if ((genome != null) && (genome.getChromosome(locus.chr) != null))
                            {
                                chrLength = genome.getChromosome(locus.chr).getLength();
                            }
                            int nPoints = (int) (1.1 * chrLength / resolution);
                            counts = new short[nPoints];
                            readCountMap.put(locus.chr, counts);
                        }

                        // An index > the counts array length indicates data beyond the 
                        // the chromosome end
                        int maxIndex = counts.length - 1;
                        int startIndex = Math.min(maxIndex, locus.start / resolution);
                        int endIndex = Math.min(maxIndex, locus.end / resolution);
                        
                        for (int i = startIndex; i <= endIndex; i++)
                        {
                            counts[i]++;
                        }
                    }

                }
                catch (Exception e)
                {
                    System.out.println("Error: " + e.getMessage() + "  Skipping line: " + nextLine);
                    e.printStackTrace();
                }
            }
        }
        catch (IOException e)
        {
            e.printStackTrace();
        }
        finally
        {
            try
            {
                reader.close();
            }
            catch (Exception e)
            {
                e.printStackTrace();
            }
        }
        return readCountMap;
    }

    private LocusInfo getLocusInfo(String[] tokens, int nTokens) {
        String chr = tokens[0];
        int start = new Integer(tokens[1]);
        int end = new Integer(tokens[2]);
        String strand = ".";
        if (nTokens > 3)
        {
            strand = tokens[3];
        }
        return new LocusInfo(chr, start, end, strand);
    }

    // GTTACATTTTTCAGTATAAAGTTAGTGTAGTATTC 17500 1 9:34004972 R GAATACTACACTAACTTTATACTGAAAAATGTAAC 14359

    static String[] locusBuf = new String[2];
    private LocusInfo getLocusInfoSolexa(String[] tokens, int nTokens) {
        String sequence = tokens[0];
        String locusString = tokens[3];
        String strandString = tokens[4];

        ParsingUtils.split(locusString, locusBuf, ':');

        String chr = "chr" + locusBuf[0];
        int start = new Integer(locusBuf[1]);
        int end = start + sequence.length();
        String strand = strandString.equals("F") ? "+" : "-";

        return new LocusInfo(chr, start, end, strand);
    }

    static class LocusInfo {
        String chr;
        int start;
        int end;
        String strand;

        /**
         * Constructs ...
         *
         *
         * @param chr
         * @param start
         * @param end
         * @param strand
         */
        public LocusInfo(String chr, int start, int end, String strand) {
            this.chr = chr;
            this.start = start;
            this.end = end;
            this.strand = strand;
        }


    }
}
