/*
 * The Broad Institute
 * SOFTWARE COPYRIGHT NOTICE AGREEMENT
 * This is copyright (2007-2009) by the Broad Institute/Massachusetts Institute
 * of Technology.  It is licensed to You under the Gnu Public License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 *  the License.  You may obtain a copy of the License at
 *
 *    http://www.opensource.org/licenses/gpl-2.0.php
 *
 * This software is supplied without any warranty or guaranteed support
 * whatsoever. Neither the Broad Institute nor MIT can be responsible for its
 * use, misuse, or functionality.
*/

package org.broad.igv.data.rnai;

import cern.colt.map.OpenIntObjectHashMap;
import org.broad.igv.util.ResourceLocator;
import org.broad.igv.feature.*;
import org.broad.igv.util.AsciiLineReader;
import org.broad.igv.ui.MiscStuff;
import org.broad.igv.ui.IGVMainFrame;
import org.broad.igv.track.TrackManager;
import org.apache.log4j.Logger;

import javax.swing.*;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStream;
import java.util.*;
import java.util.zip.GZIPInputStream;
import java.net.URL;
import java.net.URLConnection;

import org.broad.igv.ui.IGVModel;

/**
 * Created by IntelliJ IDEA.
 * User: nazaire
 * Date: Feb 25, 2009
 */
public class RNAIGCTDatasetParser {

    private static Logger log = Logger.getLogger(RNAIGCTDatasetParser.class);
    private ResourceLocator dataFileLocator;
    private int dataStartColumn = 2;
    private int descriptionColumn = 1;
    private final String RNAI_MAPPING_FILE = "http://www.broadinstitute.org/igv/resources/probes/rnai/RNAI_probe_mapping.txt.gz";
    GeneManager geneManager = null;

    /**
     * Constructs ...
     *
     *
     * @param gctFile
     */
    public RNAIGCTDatasetParser(ResourceLocator gctFile) {
        this.dataFileLocator = gctFile;

        dataStartColumn = 2;

        String genome = IGVModel.getInstance().getViewContext().getGenomeId();

        this.geneManager = GeneManager.getGeneManager(genome);

    }

    public Collection<RNAIDataSource> parse() {
        // Create a buffer for the string split utility.  We use  a custom utility as opposed
        AsciiLineReader reader = null;
        List dataSources = null;
        try
        {
            String[] tokens = new String[1000];
            reader = ParsingUtils.openAsciiReader(dataFileLocator);

            String nextLine = null;
            String headerLine = null;

            // Skip header rows

            nextLine = reader.readLine();
            nextLine = reader.readLine();

            headerLine = reader.readLine();

            // Parse column headings
            int skip = 1;
            int nTokens = ParsingUtils.split(headerLine, tokens, '\t');


            String description = (nTokens > descriptionColumn)
                ? new String(tokens[descriptionColumn]) : null;

            int nColumns = (nTokens - dataStartColumn) / skip;
            String[] columnHeadings = new String[nColumns];
            for (int i = 0; i < nColumns; i++)
            {
                String heading = tokens[dataStartColumn + i * skip].replace('\"', ' ').trim();
                columnHeadings[i] = heading;
            }

            OpenIntObjectHashMap rnaiProbeMap = ProbeToGeneMap.getInstance().getRNAiProbeMap();
            URL url = new URL(RNAI_MAPPING_FILE);
            URLConnection connection = url.openConnection();
            InputStream probeMappingStream = new GZIPInputStream(connection.getInputStream());
            if (probeMappingStream == null)
            {
                log.error("Could not retrieve probe mapping file: " + RNAI_MAPPING_FILE);
                return null;
            }

            if (rnaiProbeMap == null || rnaiProbeMap.isEmpty()) // load RNAi probe mappings
            {
                try
                {
                    AsciiLineReader br = new AsciiLineReader(probeMappingStream);
                    rnaiProbeMap = new OpenIntObjectHashMap();
                    ProbeToGeneMap.getInstance().loadMapping(br, rnaiProbeMap);
                    br.close();
                } catch (Exception e)
                {
                    JOptionPane.showMessageDialog(IGVMainFrame.getInstance(),
                        "<html>Could not load the probe mapping file " + RNAI_MAPPING_FILE + "<br>");
                    log.error(e.getMessage());
                    return null;
                }
            }

            HashMap<String, HashMap<String, Float>> sampleGeneScoreMap = new HashMap();
            while ((nextLine = reader.readLine()) != null)
            {
                nTokens = ParsingUtils.split(nextLine, tokens, '\t');
                String probeId = new String(tokens[0]);
                float[] values = new float[nColumns];

                String[] identifiers = (String[]) rnaiProbeMap.get(probeId.hashCode());
                String identifier = null;
                if (identifiers == null || identifiers.length == 0)
                {
                    log.error("Could not find mapping for: " + probeId);
                    continue;
                } else
                {
                    identifier = identifiers[0];
                }

                Feature gene = FeatureDB.getFeature(identifier.toUpperCase());
                if (gene == null)
                {
                    log.debug("Unknown identifier: " + identifier);
                    continue;
                }

                for (int i = 0; i < nColumns; i++)
                {
                    try
                    {
                        int dataIndex = dataStartColumn + i * skip;

                        // If we are out of value tokens, or the cell is blank, assign NAN to the cell.
                        if ((dataIndex >= nTokens) || (tokens[dataIndex].length() == 0))
                        {
                            values[i] = Float.NaN;
                        } else
                        {
                            values[i] = Float.parseFloat(tokens[dataIndex]);
                        }

                        String sample = columnHeadings[i];
                        RNAIHairpinValue hairpin = new RNAIHairpinValue(probeId, values[i]);
                        RNAIHairpinCache.getInstance().addHairpinScore(sample, gene.getName(),
                            hairpin);

                        HashMap<String, Float> geneScoreMap = sampleGeneScoreMap.get(sample);

                        if (geneScoreMap == null)
                        {
                            geneScoreMap = new HashMap();
                            sampleGeneScoreMap.put(sample, geneScoreMap);
                        }

                        Float geneScore = geneScoreMap.get(gene.getName());
                        if (geneScore == null)
                        {
                            geneScore = values[i];
                            geneScoreMap.put(gene.getName(), geneScore);
                        } else
                        {

                            geneScore = new Float(Math.min(values[i], geneScore.floatValue()));
                            geneScoreMap.put(gene.getName(), geneScore);
                        }
                    } catch (NumberFormatException numberFormatException)
                    {

                        // This is an expected condition.  IGV uses NaN to
                        // indicate non numbers (missing data values)
                        values[i] = Float.NaN;
                    }
                }
            }

            dataSources = computeGeneScores(sampleGeneScoreMap);

        } catch (IOException ex)
        {
            log.error("Error parsing RNAi file", ex);
            throw new RuntimeException("Error loading RNAi file: " + dataFileLocator.getPath() +
                " (" + ex.getMessage() + ")");
        } finally
        {
            if (reader != null)
            {
                reader.close();
            }
        }

        return dataSources;
    }

    private List computeGeneScores(HashMap<String, HashMap<String, Float>> sampleGeneScoreMap) {
        int confidence = 3;
        List dataSources = new ArrayList();
        Iterator samplesIt = sampleGeneScoreMap.keySet().iterator();
        while (samplesIt.hasNext())
        {
            String sample = (String) samplesIt.next();
            HashMap geneMap = sampleGeneScoreMap.get(sample);
            RNAIDataSource ds = new RNAIDataSource(sample, "");
            Iterator geneScoreIt = geneMap.keySet().iterator();
            while (geneScoreIt.hasNext())
            {
                String gene = (String) geneScoreIt.next();
                Float score = (Float) geneMap.get(gene);
                int numHairpins;
                Collection hairpins = RNAIHairpinCache.getInstance().getHairpinScores(sample, gene);
                if (hairpins == null)
                {
                    numHairpins = 0;
                } else
                {
                    numHairpins = hairpins.size();
                }
                ds.addGeneScore(new RNAIGeneScore(sample,
                    FeatureDB.getFeature(gene), score.floatValue(), numHairpins));
            }
            dataSources.add(ds);
        }
        return dataSources;
    }
}
