/*
 * The Broad Institute
 * SOFTWARE COPYRIGHT NOTICE AGREEMENT
 * This is copyright (2007-2009) by the Broad Institute/Massachusetts Institute
 * of Technology.  It is licensed to You under the Gnu Public License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 *  the License.  You may obtain a copy of the License at
 *
 *    http://www.opensource.org/licenses/gpl-2.0.php
 *
 * This software is supplied without any warranty or guaranteed support
 * whatsoever. Neither the Broad Institute nor MIT can be responsible for its
 * use, misuse, or functionality.
 */
package org.broad.igv.feature;

//~--- JDK imports ------------------------------------------------------------
import java.util.List;

/**
 *
 * @author jrobinso
 */
public class UCSCGeneTableParser extends UCSCParser {

    private int cdEndColumn = 7;
    private int cdStartColumn = 6;
    private int chrColumn = 2;
    private int endColumn = 5;
    private int endsBufferColumn = 10;
    private int exonCountColumn = 8;
    private int idColumn = 1;
    private int nameColumn = 0;
    private int startColumn = 4;
    private int startsBufferColumn = 9;
    private int strandColumn = 3;

    public enum Type {

        REFFLAT, GENEPRED, UCSCGENE
    };
    private Type type;

    /**
     * Constructs ...
     *
     *
     * @param type
     */
    public UCSCGeneTableParser(Type type) {
        this.type = type;
        switch (type) {
            case REFFLAT:
                break;
            case UCSCGENE:
                idColumn = 0;
                chrColumn = 1;
                strandColumn = 2;
                startColumn = 3;
                endColumn = 4;
                cdStartColumn = 5;
                cdEndColumn = 6;
                exonCountColumn = 7;
                startsBufferColumn = 8;
                endsBufferColumn = 9;
                nameColumn = 10;
                break;
            case GENEPRED:
                nameColumn = 12;
                break;

            //#name	chrom	strand	txStart	txEnd	cdsStart	cdsEnd	exonCount	exonStarts	exonEnds	proteinID
            //  0      1       2        3      4        5          6         7          8          9           10
        }
    }

    @Override
    protected Feature parseLine(String[] tokens, int tokenCount) {

        if (tokenCount <= strandColumn) {
            return null;
        }

        String identifier = new String(tokens[idColumn].trim());
        String name = null;
        if (tokenCount > nameColumn && tokens[nameColumn] != null) {
            name = new String(tokens[nameColumn]);
        }

        if (name == null || name.length() == nameColumn) {
            name = identifier;
        }


        String chr = tokens[chrColumn].trim().intern();
        int start = Integer.parseInt(tokens[startColumn]);
        int end = Integer.parseInt(tokens[endColumn]);
        String strandString = tokens[strandColumn];
        Strand strand = Strand.NONE;
        if (strandString != null) {
            if (strandString.trim().equals("+")) {
                strand = Strand.POSITIVE;
            } else if (strandString.trim().equals("-")) {
                strand = Strand.NEGATIVE;
            }
        }

        BasicFeature gene = new BasicFeature(chr, start, end, strand);

        gene.setName(name);
        gene.setIdentifier(identifier);


        // Coding information is optional
        if (tokenCount > 8) {
            createExons(tokens, tokenCount, gene, chr, strand);
        }
        return gene;
    }

    private void createExons(String[] tokens, int tokenCount, BasicFeature gene, String chr,
            Strand strand)
            throws NumberFormatException {

        int cdStart = Integer.parseInt(tokens[cdStartColumn]);
        int cdEnd = Integer.parseInt(tokens[cdEndColumn]);

        int exonCount = Integer.parseInt(tokens[exonCountColumn]);
        String[] startsBuffer = new String[exonCount];
        String[] endsBuffer = new String[exonCount];
        ParsingUtils.split(tokens[startsBufferColumn], startsBuffer, ',');
        ParsingUtils.split(tokens[endsBufferColumn], endsBuffer, ',');


        if (startsBuffer.length == endsBuffer.length) {
            int exonNumber = (strand == Strand.NEGATIVE ? exonCount : 1);
            for (int i = 0; i < startsBuffer.length; i++) {
                int exonStart = Integer.parseInt(startsBuffer[i]);
                int exonEnd = Integer.parseInt(endsBuffer[i]);
                Exon exon = new Exon(chr, exonStart, exonEnd, strand);
                exon.setCodingStart(cdStart);
                exon.setCodingEnd(cdEnd);
                exon.setNumber(exonNumber);
                gene.addExon(exon);
                if (strand == Strand.NEGATIVE) {
                    exonNumber--;
                } else {
                    exonNumber++;
                }
            }
        }

        if (type == Type.GENEPRED && tokenCount > 15) {
            try {
                List<Exon> exons = gene.getExons();
                String[] frameBuffer = new String[exonCount];
                ParsingUtils.split(tokens[15], frameBuffer, ',');
                for (int i = 0; i < frameBuffer.length; i++) {
                    int exonFrame = Integer.parseInt(frameBuffer[i].trim());
                    if (exonFrame == -1) {
                        exons.get(i).setUTR(true);
                    } else {
                        int phase = (exonFrame == 0) ? 0 : strandColumn - exonFrame;
                        exons.get(i).setPhase(phase);
                    }
                }
            } catch (Exception e) {

                // Ignore -- not getting the reading frame is not the end of the world.
            }

        }
    }
}
