/*
 * The Broad Institute
 * SOFTWARE COPYRIGHT NOTICE AGREEMENT
 * This is copyright (2007-2009) by the Broad Institute/Massachusetts Institute
 * of Technology.  It is licensed to You under the Gnu Public License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 *  the License.  You may obtain a copy of the License at
 *
 *    http://www.opensource.org/licenses/gpl-2.0.php
 *
 * This software is supplied without any warranty or guaranteed support
 * whatsoever. Neither the Broad Institute nor MIT can be responsible for its
 * use, misuse, or functionality.
 */
package org.broad.igv.feature;

//~--- non-JDK imports --------------------------------------------------------
import java.io.BufferedReader;
import org.apache.log4j.Logger;

import org.broad.igv.util.ResourceLocator;
import org.broad.igv.renderer.BasicFeatureRenderer;
import org.broad.igv.renderer.GeneTrackRenderer;
import org.broad.igv.track.FeatureTrack;
import java.io.IOException;

import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import org.broad.igv.util.AsciiLineReader;

/**
 *  Parse a GFF feature file.
 */
public class GFF3Parser implements FeatureParser {

    static Logger log = Logger.getLogger(GFF3Parser.class);
    Map<String, GFF3Transcript> transcriptCache = new HashMap(50000);
    Map<String, BasicFeature> geneCache = new HashMap(50000);

    /**
     *  By definition this is a feature file
     *
     *  @param locator
     *  @return
     */
    public boolean isFeatureFile(ResourceLocator locator) {
        return true;
    }

    /**
     * Method description
     *
     *
     * @param locator
     *
     * @return
     */
    public List<FeatureTrack> loadTracks(ResourceLocator locator) {

        // public List<Feature> loadTracks(ResourceLocator locator) {

        AsciiLineReader reader = null;
        try
        {
            reader = ParsingUtils.openAsciiReader(locator);

            List<Feature> features = loadFeatures(reader);

            FeatureTrack track = new FeatureTrack(locator, locator.getDisplayName(), features);

            track.setRendererClass(BasicFeatureRenderer.class);
            track.setMinimumHeight(35);
            track.setHeight(45);
            track.setRendererClass(GeneTrackRenderer.class);

            List<FeatureTrack> tracks = new ArrayList();
            tracks.add(track);
            return tracks;

        } catch (IOException ex)
        {
            ex.printStackTrace();
            return null;
        } finally
        {
            if (reader != null)
            {
                reader.close();

            }
        }
    }
    /**
     * Method description
     *
     *
     * @param reader
     *
     * @return
     */
    static String[] tokens = new String[20];

    /**
     * Method description
     *
     *
     * @param reader
     *
     * @return
     */
    public List<Feature> loadFeatures(AsciiLineReader reader) {
        List<Feature> features = new ArrayList();
        try
        {


            String line = null;

            while ((line = reader.readLine()) != null)
            {

                if (line.startsWith("#"))
                {
                    continue;
                }

                int nTokens = ParsingUtils.split(line, tokens, '\t');

                // GFF files have 9 tokens
                if (nTokens < 9)
                {
                    continue;
                }

                // The type
                String featureType = new String(tokens[2].trim());


                // GFF coordinates are 1-based inclusive (length = end - start + 1)
                // IGV (UCSC) coordinates are 0-based exclusive.  Adjust start and end accordingly
                String chromosome = new String(tokens[0].trim());
                int start = Integer.parseInt(tokens[3]) - 1;
                int end = Integer.parseInt(tokens[4]);

                Strand strand = convertStrand(tokens[6]);
                String attributeString = tokens[8];
                Map<String, String> attributes = parseDescription(attributeString);
                String description = getDescription(attributes);

                String id = attributes.get("ID");

                if (featureType.equals("five_prime_UTR") || featureType.equals("three_prime_UTR") || featureType.equals("exon") || featureType.equals("CDS"))
                {

                    Exon exon = new Exon(chromosome, start, end, strand);
                    exon.setDescription(description);
                    String phaseString = tokens[7].trim();
                    if (!phaseString.equals("."))
                    {
                        try
                        {
                            exon.setPhase(Integer.parseInt(phaseString));

                        } catch (NumberFormatException numberFormatException)
                        {

                            // Just skip setting the phase
                            log.error("GFF3 error: non numeric phase: " + phaseString);
                        }
                    }

                    String parentString = attributes.get("Parent");
                    if (parentString == null)
                    {

                    // Orphaned coding region.
                    // Should these be added as distinct features?

                    } else
                    {
                        String[] parentIds = parentString.split(",");
                        for (String pid : parentIds)
                        {
                            if (featureType.equals("exon"))
                            {
                                getGFF3Transcript(pid).addExon(exon);
                            } else if (featureType.equals("CDS"))
                            {
                                getGFF3Transcript(pid).addCDS(exon);
                            } else if (featureType.equals("five_prime_UTR"))
                            {
                                getGFF3Transcript(pid).fivePrimeUTR = exon;
                            } else if (featureType.equals("three_prime_UTR"))
                            {
                                getGFF3Transcript(pid).threePrimeUTR = exon;
                            }
                        }
                    }
                } else
                {
                    BasicFeature f = new BasicFeature(chromosome, start, end, strand);
                    f.setName(getName(attributes));
                    f.setDescription(description);

                    if (id == null)
                    {
                        features.add(f);
                    } else
                    {
                        f.setIdentifier(id);

                        if (featureType.equals("gene"))
                        {
                            geneCache.put(id, f);
                        } else if (featureType.equals("mRNA") || featureType.equals("transcript"))
                        {
                            String parentId = attributes.get("Parent");
                            if (parentId != null)
                            {
                                parentId = parentId.trim();
                            }
                            getGFF3Transcript(id).setMRna(f, parentId);
                        } else
                        {
                            features.add(f);
                        }
                    }
                }
            }

            // Create and add IGV genes
            for (GFF3Transcript transcript : transcriptCache.values())
            {
                BasicFeature igvGene = transcript.createIGVGene();
                if (igvGene != null)
                {
                    features.add(igvGene);
                }
            }

        } catch (IOException ex)
        {
        }

        return features;
    }

    private Strand convertStrand(String strandString) {
        Strand strand = Strand.NONE;
        if (strandString.equals("-"))
        {
            strand = Strand.NEGATIVE;
        } else if (strandString.equals("+"))
        {
            strand = Strand.POSITIVE;
        }

        return strand;
    }
    static Set<String> ignoreAttributes = new HashSet();

    static
    {
        ignoreAttributes.add("ID");
        ignoreAttributes.add("Parent");
        ignoreAttributes.add("wormprep");
    }

    private String getDescription(Map<String, String> attributes) {

        StringBuffer buf = new StringBuffer(1000);
        for (Map.Entry<String, String> att : attributes.entrySet())
        {
            if (!ignoreAttributes.contains(att.getKey()))
            {
                buf.append(att.getKey() + " = " + att.getValue());
                buf.append("<br>");
            }
        }
        String description = buf.toString();

        return description;
    }

    private GFF3Transcript getGFF3Transcript(String id) {
        GFF3Transcript transcript = transcriptCache.get(id);
        if (transcript == null)
        {
            transcript = new GFF3Transcript(id);
            transcriptCache.put(id, transcript);
        }
        return transcript;
    }
    /**
     * Try to find a reasonable name from the attributes
     */
    static String[] nameFields = {"Name", "name", "Locus", "locus"};

    private String getName(Map<String, String> attributes) {
        for (String nf : nameFields)
        {
            if (attributes.containsKey(nf))
            {
                return attributes.get(nf);
            }
        }

        // Try "note" as a last resort
        if (attributes.containsKey("note"))
        {
            return attributes.get("note").split(",")[0];
        }
        return null;
    }

    // Feature feature = featureCache.get(featureName);
    // if(feature == null)
    // {
    // feature = <create feature from tokens>
    // featureMap.put(featureName, feature);
    // return feature;
    // }
    // else
    // {
    // augment existing feature, for example
    // FeatureRegion region = <create region from tokens>
    // feature.addExon(region);
    // return null;   <= signals that this line did not create a new features
    // }
    private Map<String, String> parseDescription(String description) {
        String[] descTokens = new String[100];
        String[] tmp = new String[10];
        Map<String, String> kvalues = new HashMap();
        int nTokens = ParsingUtils.split(description, descTokens, ';');
        for (int i = 0; i < nTokens; i++)
        {
            String kv = descTokens[i];
            int nValues = ParsingUtils.split(kv, tmp, '=');
            String key = tmp[0].trim();
            String value = ((nValues == 1) ? "" : tmp[1].trim());
            kvalues.put(key, value);

        }
        return kvalues;
    }

    class GFF3Transcript {

        private String id;
        private Set<Exon> exons = new HashSet();
        private List<Exon> cdss = new ArrayList();
        private Exon fivePrimeUTR;
        private Exon threePrimeUTR;
        private BasicFeature mRNA;
        private String parentId;

        GFF3Transcript(String id) {
            this.id = id;
        }

        void setMRna(BasicFeature mRNA, String parent) {
            this.mRNA = mRNA;
            this.parentId = parent;
            if (mRNA.getName() == null)
            {
                mRNA.setName(mRNA.getIdentifier());
            }
            int prefixIndex = mRNA.getName().indexOf(":");
            if (prefixIndex > 0)
            {
                mRNA.setName(mRNA.getName().substring(prefixIndex + 1));
            }

        }

        void addExon(Exon exon) {
            exons.add(exon);
        }

        void addCDS(Exon cds) {
            cdss.add(cds);
        }

        /**
         * Create an IGV "gene (transcript)" from a GFF mRNA (transcript). "
         * @return
         */
        BasicFeature createIGVGene() {

            if (mRNA == null)
            {
                return null;
            }

            if ((parentId != null) && geneCache.containsKey(parentId))
            {
                BasicFeature gene = geneCache.get(parentId);
                geneCache.remove(parentId);
                if (gene.getName() != null)
                {
                    mRNA.setName(gene.getName());
                }
                mRNA.setDescription("Gene<br>" + gene.getDescription() + "<br>--------<br>Transcript<br>" + mRNA.getDescription());

            // mRNA.setName(gene.getName());
            }

            // Combine exon & cds
            while (!cdss.isEmpty())
            {
                Exon cds = cdss.get(0);
                Exon exon = findMatchingExon(cds);
                if (exon == null)
                {
                    mRNA.addExon(cds);
                } else
                {
                    exon.setCodingStart(cds.getStart());
                    exon.setCodingEnd(cds.getEnd());
                    exon.setReadingFrame(cds.getReadingShift());
                    mRNA.addExon(exon);
                    exons.remove(exon);
                }
                cdss.remove(0);
            }

            if (fivePrimeUTR != null)
            {
                fivePrimeUTR.setUTR(true);
                mRNA.addExon(fivePrimeUTR);
                Exon exon = findMatchingExon(fivePrimeUTR);
                if (exon != null)
                {
                    if (exon.getStrand() == Strand.POSITIVE)
                    {
                        exon.setStart(fivePrimeUTR.getEnd());
                    } else
                    {
                        exon.setEnd(fivePrimeUTR.getStart());
                    }
                }
            }

            if (threePrimeUTR != null)
            {
                threePrimeUTR.setUTR(true);
                mRNA.addExon(threePrimeUTR);
                Exon exon = findMatchingExon(threePrimeUTR);
                if (exon != null)
                {
                    if (exon.getStrand() == Strand.POSITIVE)
                    {
                        exon.setEnd(threePrimeUTR.getStart());
                    } else
                    {
                        exon.setStart(threePrimeUTR.getEnd());
                    }
                }
            }

            mRNA.sortExons();

            return mRNA;
        }

        Exon findMatchingExon(Feature cds) {
            for (Exon exon : exons)
            {
                if (exon.contains(cds))
                {
                    return exon;
                }
            }
            return null;
        }
    }
}
