/*
 * The Broad Institute
 * SOFTWARE COPYRIGHT NOTICE AGREEMENT
 * This is copyright (2007-2009) by the Broad Institute/Massachusetts Institute 
 * of Technology.  It is licensed to You under the Gnu Public License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 *  the License.  You may obtain a copy of the License at
 *
 *    http://www.opensource.org/licenses/gpl-2.0.php
 *
 * This software is supplied without any warranty or guaranteed support
 * whatsoever. Neither the Broad Institute nor MIT can be responsible for its
 * use, misuse, or functionality.
*/

/*
 * AbstractProcessor.java
 *
 * Created on Sep 26, 2007, 4:41:31 PM
 *
 * To change this template, choose Tools | Templates
 * and openFile the template in the editor.
 */
package org.broad.igv.preprocess.old;

import cern.colt.list.DoubleArrayList;
import cern.jet.stat.quantile.DoubleQuantileFinder;
import cern.jet.stat.quantile.QuantileFinderFactory;
import org.broad.igv.feature.Chromosome;
import org.broad.igv.feature.Genome;
import org.broad.igv.feature.Feature;
import java.io.File;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import org.broad.igv.feature.BEDFileParser;
import org.broad.igv.feature.FeatureUtils;
import org.broad.igv.feature.GenomeManager;
import org.broad.igv.util.ResourceLocator;
import org.broad.igv.h5.HDF5LocalWriter;
import org.broad.igv.h5.HDFWriter;


/**
 *
 * @author jrobinso
 */
public class BEDFeatureProcessor {

    static int zoomMax = 3;
    static int tileWidthPixels = 700;

    static HDFWriter writer = new HDF5LocalWriter();
    /**
     * Example usage  BedFeatureProcessor 
     *   -inputFile data/FeatureTracks/071109_mm8_TUset_CompleteGenome_v2.bed
     *   -ouputFile data/071109_mm8_TUset_CompleteGenome_v2.h5
     *   -genome mm8
     * @param args
     */
    public static void main(String[] args) {


        LinkedHashMap<String, String> argsMap = parseArgs(args);
        String inputFile = argsMap.get("-inputFile");
        String outputFile = argsMap.get("-outputFile");
        String genomeName = argsMap.get("-genome");
        String name = argsMap.get("-name");
        
        if(inputFile == null || outputFile == null || genomeName == null) {
            System.out.println("Usage BEDFeatureProcessor -inputFile <file> -outputFile <file> -genome <mm8> -name <optional>");
            System.exit(-1);
        }

        File featureFile = new File(inputFile);
        if(name == null) {
            name = featureFile.getName();
        }
        
    }


    /**
     *
     */
    public static void processFeatures(String filename, Genome genome, List<Feature> allFeatures, String name) {

        int file = writer.createFile(filename);

        // openFile root group
        int root = writer.openGroup(file, "/");

        // Record type
        writer.writeAttribute(root, "name", name);
        writer.writeAttribute(root, "type", "FEATURE");
        writer.writeAttribute(root, "has.data", 0);


        // Create a group for this feature track.  Might store multiple tracks
        // in one HDF5 file in the future.
        int featureGroup = writer.createGroup(root, "features");


        // Divide features by chromosome
        Map<String, List<Feature>> featureMap = FeatureUtils.divideByChromosome(allFeatures);

        // Loop through chromosomes
        for (String chr : featureMap.keySet()) {

            Chromosome chromosome = genome.getChromosome(chr);
            if (chromosome == null) {
                System.out.println("No chromosome: " + chr);
            } else {
                int chrLength = (int) chromosome.getLength();
                List<Feature> features = featureMap.get(chr);
                FeatureUtils.sortFeatureList(features);


                // TODO find out where "chr"  is getting stripped off chr string
                int featureChrGroup = writer.createGroup(featureGroup, chr);

                writer.writeAttribute(featureChrGroup, "length", chrLength);

                int maxZoom = processZoomLevels(featureChrGroup, chrLength, features);

                writer.writeAttribute(featureChrGroup, "zoom.levels", maxZoom);

                boolean hasStrand = features.get(0).hasStrand();
                boolean hasScore = features.get(0).hasScore();
                processRawData(featureChrGroup, chrLength, features, maxZoom,
                        hasScore, hasStrand);


                writer.closeGroup(featureChrGroup);
            }
        }

        writer.closeGroup(featureGroup);


        // cleanup
        writer.closeGroup(root);
        writer.closeFile(file);
    }

    private static void processRawData(int chrGroup, int chrLength,
            List<Feature> features, int maxZoom,
            boolean hasScore, boolean hasStrand) {

        int rawGroup = writer.createGroup(chrGroup, "raw");

        int nFeatures = features.size();
        int[] start = new int[nFeatures];
        int[] end = new int[nFeatures];
        String[] name = new String[nFeatures];
        float[] score = hasScore ? new float[nFeatures] : null;
        char[] strand = hasStrand ? new char[nFeatures] : null;

        int maxStringWidth = 0;
        for (int i = 0; i < nFeatures; i++) {
            Feature f = features.get(i);
            start[i] = (int) f.getStart();
            end[i] = (int) f.getEnd();
            name[i] = f.getName();
            maxStringWidth = Math.max(maxStringWidth, name[i].length());
            if (score != null) {
                score[i] = f.getScore();
            }
        //if(strand != null) strand[i] = f.getStrand();
        }

        writer.createAndWriteVectorDataset(rawGroup, "start", start);
        writer.createAndWriteVectorDataset(rawGroup, "end", end);
        writer.createAndWriteStringDataset(rawGroup, "name", name, maxStringWidth);
        if (score != null) {
            writer.createAndWriteVectorDataset(rawGroup, "score", score);
        }
        //if(strand != null) writer.createAndWriteVectorDataset(rawGroup, "strand", strand);
        recordRawIndex(rawGroup, chrLength, features);

        writer.closeGroup(rawGroup);
    }

    /**  TODO  THIS IS NEARLY AN  EXACT COPY FROM SnpSolexaProcessor.  REFACTOR TO
     *   COMMON CLASS.  THE ONLY DIFFERENCE IS THE REPLACEMENT OF locations  with
     *   features.
     * Compute the tile boundaries for the raw data.
     *  zoomMax is the maximum zoom level recorded.  Use to size tiles.
     */
    private static void recordRawIndex(int groupId, int chrLength, List<Feature> features) {

        // Chunk size in base pairs.  Assume features are evenly distributed, set
        // set size to encompass approximately 1000 features per chunk, but no larger
        // than 1/4 the chr length
        
        double featuresPerBp = ((double) features.size()) / chrLength;
        double chunkSize = Math.min(chrLength / 4,  1000 / featuresPerBp);
        int nChunks = (int) (chrLength / chunkSize) + 1;

        int[] indices = new int[nChunks];

        int i = 0;
        int n = 0;
        while (n < nChunks && i < features.size()) {
            int boundary = (int) (n * chunkSize);

            // Skip until we have crossed the boundary
            try {
                while (features.get(i).getStart() < boundary && i < features.size() - 1) {
                    i++;
                }
            } catch (Exception e) {
                e.printStackTrace();
            }

            indices[n] = i;

            n++;

        }

        // If we haven't filled the index array it means we have run out of 
        // locations.  In other words there is no data (locations) in the 
        // remaining chunks.  Record there indeces = the max location index.
        while (n < nChunks) {
            indices[n] = features.size() - 1;
            n++;
        }

        writer.writeAttribute(groupId, "index.span", chunkSize);
        writer.createAndWriteVectorDataset(groupId, "index", indices);

    }

    /**
     * Note:  method body nearly exacty copy of expression processor
     *
     *  Return the maximum zoom level computed
     */
    private static int processZoomLevels(int featureChrGroup, int chrLength, List<Feature> features) {

        int z = 0;

        //TODO -- get smarter about computing max zoom level.  Maybe look at feature counts.
        while (z < zoomMax) {

            BinnedData binInfo = computeBinnedData(chrLength, z, features);
            if (binInfo.getLocations().length == 0) {
                System.out.println("Zero lengh locations: " + z);
                return z;
            }

            String zoomName = "z" + binInfo.getZoomLevel();
            int featureZoomGroup = writer.createGroup(featureChrGroup, zoomName);

            // Compute counts in features / KBs
            float[] counts = binInfo.getCounts();

            // TODO nTiles is also calculated in computeBinnedData.  do it in one place
            int nTiles = (int) Math.pow(2, z);
            double tileWidth = chrLength / nTiles;

            writer.writeAttribute(featureZoomGroup, "tile.width", tileWidth);
            writer.writeAttribute(featureZoomGroup, "bin.size", binInfo.getBinSize());
            writer.writeAttribute(featureZoomGroup, "mean.count", binInfo.getMeanCount());
            writer.writeAttribute(featureZoomGroup, "median.count", binInfo.getMedianCount());
            writer.writeAttribute(featureZoomGroup, "max.count", binInfo.getMaxCount());

            // Record bin start locations
            int[] locations = binInfo.getLocations();
            writer.createAndWriteVectorDataset(featureZoomGroup, "start", locations);

            // Record the number of data points for each bin

            // Record boundary indices (bin number) for each tile
            int[] tileBoundaries = binInfo.getTileBoundaries();
            writer.createAndWriteVectorDataset(featureZoomGroup, "tile.boundary", tileBoundaries);

            writer.createAndWriteVectorDataset(featureZoomGroup, "count", counts);

            writer.closeGroup(featureZoomGroup);

            z++;

            if (binInfo.getMeanCount() < 3) {
                return z;
            }

        }

        return z;
    }

    /**
     * Allocate features to bins for a zoom level.  A single probe can be spread across multiple
     * bin, and a single bin can have contributions from multiple genes.
     */
    public static BinnedData computeBinnedData(int chrLength, int zoomLevel, List<Feature> features) {


        // Now remove empty bins & count the number of bins with multiple features
        //int nPackedBins = 0;
        int nTiles = (int) Math.pow(2, zoomLevel);
        int nBins = nTiles * tileWidthPixels;
        double binSize = ((double) chrLength) / nBins;


        List<FeatureBin> occupiedBins = (new FeatureBinCalculator()).computeFeatureBins(features, nBins, binSize, 0, chrLength);

        // Find tile breaks.  Could possibly do this n loop above.
        int[] tileBoundaries = new int[nTiles];
        int binNumber = 0;
        double tileLength = chrLength / nTiles;
        for (int tileNumber = 0; tileNumber < nTiles - 1; tileNumber++) {
            // Find end bin for this tile.  Using a linear search, might
            // need to use a faster scheme.
            if (!occupiedBins.isEmpty()) {
                while (binNumber < occupiedBins.size() &&
                        occupiedBins.get(binNumber).getStart() < (tileNumber + 1) * tileLength) {
                    binNumber++;
                }

            }
            tileBoundaries[tileNumber] = binNumber;
        }
        // Boundary for last tile number is end
        tileBoundaries[nTiles - 1] = occupiedBins.size() - 1;

        BinnedData binInfo = new BinnedData(zoomLevel, binSize, occupiedBins, tileBoundaries);

        // Compute the mean, median, and 90th percentile of occupied beans.
        float mean = 0f;
        float max = 0f;
        DoubleArrayList percentiles = new DoubleArrayList(3);
        percentiles.add(0.1);
        percentiles.add(0.5);
        percentiles.add(0.90);
        DoubleQuantileFinder qf = QuantileFinderFactory.newDoubleQuantileFinder(false, Long.MAX_VALUE,
                0.001, 0.0001, percentiles.size(), null);
        for (Bin bin : occupiedBins) {
            int count = bin.getFeatureCount();
            mean += count;
            max = Math.max(max, count);
            qf.add(count);
        }
        DoubleArrayList quantiles = qf.quantileElements(percentiles);
        binInfo.setMeanCount(mean / occupiedBins.size());
        binInfo.setMaxCount(max);
        binInfo.setPercentile10(quantiles.get(0));
        binInfo.setMedianCount(quantiles.get(1));
        binInfo.setPercentile90(quantiles.get(2));


        return binInfo;

    }

    /**
     * TODO move to utilitiy class
     * @param args
     * @return
     */
    private static LinkedHashMap<String, String> parseArgs(String[] args) {

        LinkedHashMap<String, String> argMap = new LinkedHashMap();
        for (int i = 0; i < args.length; i++) {
            System.out.println(args[i]);
            String key = args[i];
            if (args[i].startsWith("-")) {
                if (i < args.length - 1) {
                    i++;
                    argMap.put(key, args[i]);
                } else {
                    argMap.put(key, "");
                }
            } else {
                argMap.put(key, key);
            }
        }
        return argMap;

    }
}
