/*
 * Copyright (c) 2007-2010 by The Broad Institute, Inc. and the Massachusetts Institute of Technology.
 * All Rights Reserved.
 *
 * This software is licensed under the terms of the GNU Lesser General Public License (LGPL), Version 2.1 which
 * is available at http://www.opensource.org/licenses/lgpl-2.1.php.
 *
 * THE SOFTWARE IS PROVIDED "AS IS." THE BROAD AND MIT MAKE NO REPRESENTATIONS OR WARRANTIES OF
 * ANY KIND CONCERNING THE SOFTWARE, EXPRESS OR IMPLIED, INCLUDING, WITHOUT LIMITATION, WARRANTIES
 * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, NONINFRINGEMENT, OR THE ABSENCE OF LATENT
 * OR OTHER DEFECTS, WHETHER OR NOT DISCOVERABLE.  IN NO EVENT SHALL THE BROAD OR MIT, OR THEIR
 * RESPECTIVE TRUSTEES, DIRECTORS, OFFICERS, EMPLOYEES, AND AFFILIATES BE LIABLE FOR ANY DAMAGES OF
 * ANY KIND, INCLUDING, WITHOUT LIMITATION, INCIDENTAL OR CONSEQUENTIAL DAMAGES, ECONOMIC
 * DAMAGES OR INJURY TO PROPERTY AND LOST PROFITS, REGARDLESS OF WHETHER THE BROAD OR MIT SHALL
 * BE ADVISED, SHALL HAVE OTHER REASON TO KNOW, OR IN FACT SHALL KNOW OF THE POSSIBILITY OF THE
 * FOREGOING.
 */

package org.broad.tribble.readers;

import net.sf.samtools.util.BlockCompressedInputStream;
import org.broad.tribble.util.LineReader;

import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.net.URL;
import java.nio.ByteBuffer;
import java.nio.ByteOrder;
import java.util.Arrays;
import java.util.HashMap;
import java.util.Set;


/* Contact: Heng Li <hengli@broadinstitute.org> */


public class TabixReader implements QueryReader, LineReader {
    private String filename;
    private BlockCompressedInputStream blockCompressedInputStream;

    private int format;
    private int seqCol;
    private int begCol;
    private int endCol;
    private int metaChar;
    private int nSkip;
    private String[] sequences;

    private HashMap<String, Integer> chr2tid;

    private static int MAX_BIN = 37450;
    private static int TAD_MIN_CHUNK_GAP = 32768;
    private static int TAD_LIDX_SHIFT = 14;

    private class TPair64 implements Comparable<TPair64> {
        long u, v;

        public TPair64(final long _u, final long _v) {
            u = _u;
            v = _v;
        }

        public TPair64(final TPair64 p) {
            u = p.u;
            v = p.v;
        }

        public int compareTo(final TPair64 p) {
            return u == p.u ? 0 : ((u < p.u) ^ (u < 0) ^ (p.u < 0)) ? -1 : 1; // unsigned 64-bit comparison
        }
    }

    private class TIndex {
        HashMap<Integer, TPair64[]> b; // binning index
        long[] l; // linear index
    }


    private TIndex[] mIndex;

    private class TIntv {
        int tid, beg, end;
    }


    private static boolean less64(final long u, final long v) { // unsigned 64-bit comparison
        return (u < v) ^ (u < 0) ^ (v < 0);
    }

    /**
     * The constructor
     *
     * @param fn File name of the data file
     */
    public TabixReader(final String fn) throws IOException {
        filename = fn;
        if (fn.startsWith("http:") || fn.startsWith("https:")) {
            blockCompressedInputStream = new BlockCompressedInputStream(new URL(fn));
        } else {
            blockCompressedInputStream = new BlockCompressedInputStream(new File(fn));
        }
        readIndex();
    }

    public Set<String> getSequenceNames() {
        return chr2tid.keySet();
    }

    private static int reg2bins(final int beg, final int _end, final int[] bins) {
        int i = 0, k, end = _end;
        --end;
        bins[i++] = 0;
        for (k = 1 + (beg >> 26); k <= 1 + (end >> 26); ++k) bins[i++] = k;
        for (k = 9 + (beg >> 23); k <= 9 + (end >> 23); ++k) bins[i++] = k;
        for (k = 73 + (beg >> 20); k <= 73 + (end >> 20); ++k) bins[i++] = k;
        for (k = 585 + (beg >> 17); k <= 585 + (end >> 17); ++k) bins[i++] = k;
        for (k = 4681 + (beg >> 14); (k <= 4681 + (end >> 14)) && i < bins.length; ++k) bins[i++] = k;
        return i;
    }

    public static int readInt(final InputStream is) throws IOException {
        byte[] buf = new byte[4];
        is.read(buf);
        return ByteBuffer.wrap(buf).order(ByteOrder.LITTLE_ENDIAN).getInt();
    }

    public static long readLong(final InputStream is) throws IOException {
        byte[] buf = new byte[8];
        is.read(buf);
        return ByteBuffer.wrap(buf).order(ByteOrder.LITTLE_ENDIAN).getLong();
    }

    public static String readNextLine(final InputStream is) throws IOException {
        StringBuffer buf = new StringBuffer();
        int c;
        while ((c = is.read()) >= 0 && c != '\n')
            buf.append((char) c);
        if (c < 0) return null;
        return buf.toString();
    }

    /**
     * Read the Tabix index from a file
     */
    public void readIndex(BlockCompressedInputStream is) throws IOException {
        byte[] buf = new byte[4];

        is.read(buf, 0, 4); // read "TBI\1"
        sequences = new String[readInt(is)]; // # sequences
        chr2tid = new HashMap<String, Integer>();
        format = readInt(is);   // format
        seqCol = readInt(is);
        begCol = readInt(is);
        endCol = readInt(is);
        metaChar = readInt(is);
        nSkip = readInt(is);
        // read sequence dictionary
        int i, j, k, l = readInt(is);
        buf = new byte[l];
        is.read(buf);
        for (i = j = k = 0; i < buf.length; ++i) {
            if (buf[i] == 0) {
                byte[] b = new byte[i - j];
                System.arraycopy(buf, j, b, 0, b.length);
                String s = new String(b);
                chr2tid.put(s, k);
                sequences[k++] = s;
                j = i + 1;
            }
        }
        // read the index
        mIndex = new TIndex[sequences.length];
        for (i = 0; i < sequences.length; ++i) {
            // the binning index
            int n_bin = readInt(is);
            mIndex[i] = new TIndex();
            mIndex[i].b = new HashMap<Integer, TPair64[]>();
            for (j = 0; j < n_bin; ++j) {
                int bin = readInt(is);
                TPair64[] chunks = new TPair64[readInt(is)];
                for (k = 0; k < chunks.length; ++k) {
                    long u = readLong(is);
                    long v = readLong(is);
                    chunks[k] = new TPair64(u, v); // in C, this is inefficient
                }
                mIndex[i].b.put(bin, chunks);
            }
            // the linear index
            mIndex[i].l = new long[readInt(is)];
            for (k = 0; k < mIndex[i].l.length; ++k)
                mIndex[i].l[k] = readLong(is);
        }
        // close
        is.close();
    }

    /**
     * Read the Tabix index from the default file.
     */
    public void readIndex() throws IOException {
        BlockCompressedInputStream bcis = null;
        if (filename.startsWith("http:") || filename.startsWith("https:") || filename.startsWith("ftp:")) {
            bcis = new BlockCompressedInputStream(new URL(filename + ".tbi"));
        } else {
            bcis = new BlockCompressedInputStream(new File(filename + ".tbi"));
        }
        readIndex(bcis);
    }

    /**
     * Read one line from the data file.
     */
    public String readLine() throws IOException {
        return readNextLine(blockCompressedInputStream);
    }

    private int chr2tid(final String chr) {
        if (chr2tid.containsKey(chr)) return chr2tid.get(chr);
        else return -1;
    }

    /**
     * Parse a region in the format of "chr1", "chr1:100" or "chr1:100-1000"
     *
     * @param reg Region string
     * @return An array where the three elements are sequence_id,
     *         region_begin and region_end. On failure, sequence_id==-1.
     */
    public int[] parseReg(final String reg) { // TODO FIXME: NOT working when the sequence name contains : or -.
        String chr;
        int colon, hyphen;
        int[] ret = new int[3];
        colon = reg.indexOf(':');
        hyphen = reg.indexOf('-');
        chr = colon >= 0 ? reg.substring(0, colon) : reg;
        ret[1] = colon >= 0 ? Integer.parseInt(reg.substring(colon + 1, hyphen)) - 1 : 0;
        ret[2] = hyphen >= 0 ? Integer.parseInt(reg.substring(hyphen + 1)) : 0x7fffffff;
        ret[0] = chr2tid(chr);
        return ret;
    }

    private TIntv getIntv(final String s) {
        TIntv intv = new TIntv();
        int col = 0, end = 0, beg = 0;
        while ((end = s.indexOf('\t', beg)) >= 0) {
            ++col;
            if (col == seqCol) {
                intv.tid = chr2tid(s.substring(beg, end));
            } else if (col == begCol) {
                intv.beg = intv.end = Integer.parseInt(s.substring(beg, end));
                if ((format & 0x10000) != 0) ++intv.end;
                else --intv.beg;
            } else { // TODO FIXME: SAM/VCF supports are not tested yet
                if ((format & 0xffff) == 0) { // generic
                    if (col == endCol)
                        intv.end = Integer.parseInt(s.substring(beg, end));
                } else if ((format & 0xffff) == 1) { // SAM
                    if (col == 6) { // CIGAR
                        int l = 0, i, j;
                        String cigar = s.substring(beg, end);
                        for (i = j = 0; i < cigar.length(); ++i) {
                            if (cigar.charAt(i) > '9') {
                                int op = cigar.charAt(i);
                                if (op == 'M' || op == 'D' || op == 'N')
                                    l += Integer.parseInt(cigar.substring(j, i));
                            }
                        }
                        intv.end = intv.beg + l;
                    }
                } else if ((format & 0xffff) == 2) { // VCF
                    if (col == 5) {
                        String alt = s.substring(beg, end);
                        int i, max = 1;
                        for (i = 0; i < alt.length(); ++i) {
                            if (alt.charAt(i) == 'D') { // deletion
                                int j;
                                for (j = i + 1; j < alt.length() && alt.charAt(j) >= '0' && alt.charAt(j) <= '9'; ++j) ;
                                int l = Integer.parseInt(alt.substring(i + 1, j));
                                if (max < l) max = l;
                                i = j - 1;
                            }
                        }
                        intv.end = intv.beg + max;
                    }
                }
            }
            beg = end + 1;
        }
        return intv;
    }

    public class TabixLineReader implements LineReader {
        private int i, n_seeks;
        private int tid, beg, end;
        private TPair64[] off;
        private long curr_off;
        private boolean iseof;

        public TabixLineReader(final int _tid, final int _beg, final int _end, final TPair64[] _off) {
            i = -1;
            n_seeks = 0;
            curr_off = 0;
            iseof = false;
            off = _off;
            tid = _tid;
            beg = _beg;
            end = _end;
        }

        public String readLine() throws IOException {
            if (iseof) return null;
            for (; ;) {
                if (curr_off == 0 || !less64(curr_off, off[i].v)) { // then jump to the nextLine chunk
                    if (i == off.length - 1) break; // no more chunks
                    if (i >= 0) assert (curr_off == off[i].v); // otherwise bug
                    if (i < 0 || off[i].v != off[i + 1].u) { // not adjacent chunks; then seek
                        blockCompressedInputStream.seek(off[i + 1].u);
                        curr_off = blockCompressedInputStream.getFilePointer();
                        ++n_seeks;
                    }
                    ++i;
                }
                String s;
                if ((s = readNextLine(blockCompressedInputStream)) != null) {
                    TIntv intv;
                    char[] str = s.toCharArray();
                    curr_off = blockCompressedInputStream.getFilePointer();
                    if (str.length == 0 || str[0] == metaChar) continue;
                    intv = getIntv(s);
                    if (intv.tid != tid || intv.beg >= end) break; // no need to proceed
                    else if (intv.end > beg && intv.beg < end) return s; // overlap; return
                } else break; // end of file
            }
            iseof = true;
            return null;
        }

        public void close() throws IOException {
            blockCompressedInputStream.close();
        }
    }

    private TabixLineReader query(final int tid, final int beg, final int end) {
        TPair64[] off;
        TPair64[] chunks;
        long min_off;
        TIndex idx = mIndex[tid];
        int[] bins = new int[MAX_BIN];
        int i, l, n_off, n_bins = reg2bins(beg, end, bins);
        min_off = (beg >> TAD_LIDX_SHIFT >= idx.l.length) ? 0 : idx.l[beg >> TAD_LIDX_SHIFT];
        for (i = n_off = 0; i < n_bins; ++i) {
            if ((chunks = idx.b.get(bins[i])) != null)
                n_off += chunks.length;
        }
        if (n_off == 0) {
            return null;
        }

        off = new TPair64[n_off];
        for (i = n_off = 0; i < n_bins; ++i)
            if ((chunks = idx.b.get(bins[i])) != null)
                for (int j = 0; j < chunks.length; ++j)
                    if (less64(min_off, chunks[j].v))
                        off[n_off++] = new TPair64(chunks[j]);
        Arrays.sort(off, 0, n_off);
        // resolve completely contained adjacent blocks
        for (i = 1, l = 0; i < n_off; ++i) {
            if (less64(off[l].v, off[i].v)) {
                ++l;
                off[l].u = off[i].u;
                off[l].v = off[i].v;
            }
        }

        n_off = l + 1;
        // resolve overlaps between adjacent blocks; this may happen due to the merge in indexing
        for (i = 1; i < n_off; ++i)
            if (!less64(off[i - 1].v, off[i].u)) off[i - 1].v = off[i].u;
        // merge adjacent blocks
        for (i = 1, l = 0; i < n_off; ++i) {
            if (off[l].v >> 16 == off[i].u >> 16) off[l].v = off[i].v;
            else {
                ++l;
                off[l].u = off[i].u;
                off[l].v = off[i].v;
            }
        }
        n_off = l + 1;
        // return
        TPair64[] ret = new TPair64[n_off];
        for (i = 0; i < n_off; ++i) ret[i] = new TPair64(off[i].u, off[i].v); // in C, this is inefficient
        return new TabixLineReader(tid, beg, end, ret);
    }

    public TabixLineReader query(final String reg) {
        int[] x = parseReg(reg);
        return query(x[0], x[1], x[2]);
    }

    public LineReader iterate() {
        return this;
        //return null;  //To change body of implemented methods use File | Settings | File Templates.
    }

    /**
     * NOTE:  convert from UCSC coords to 1 base
     *
     * @param chr
     * @param start
     * @param end
     * @return
     */
    public TabixLineReader query(final String chr, int start, int end) {
        int tid = chr2tid(chr);
        if (tid >= 0) {
            // Added by JTR to prevent index out of bounds exceptions
            int adjustedStart = Math.max(0, start-1);
            return query(chr2tid(chr), adjustedStart, end);
        } else {
            return null;
        }
    }

    public void close() throws IOException {
        blockCompressedInputStream.close();
    }


    public static boolean isTabix(String path) {
        if (!path.endsWith("gz")) {
            return false;
        }

        BlockCompressedInputStream is = null;
        try {
            if (path.startsWith("http:") || path.startsWith("https:") || path.startsWith("ftp:")) {
                is = new BlockCompressedInputStream(new URL(path + ".tbi"));
            } else {
                is = new BlockCompressedInputStream(new File(path + ".tbi"));
            }

            if (is == null) {
                return false;
            }
            byte[] bytes = new byte[4];
            is.read(bytes);
            return (char) bytes[0] == 'T' && (char) bytes[1] == 'B';
        } catch (IOException e) {
            e.printStackTrace();  //To change body of catch statement use File | Settings | File Templates.
            return false;
        }
        finally {
            if (is != null) {
                try {
                    is.close();
                } catch (IOException e) {
                    e.printStackTrace();  //To change body of catch statement use File | Settings | File Templates.
                }
            }
        }
    }


    public static void main(String[] args) {
        args = new String[2];
        args[0] = "/Users/jrobinso/projects/Version_1.5_rc2/test/data/CEU.SRP000032.2010_03.genotypes.vcf.gz";
        args[1] = "1:58003474-58013474";
        if (args.length < 1) {
            System.out.println("Usage: java -cp .:sam.jar TabixReader <in.gz> [region]");
            System.exit(1);
        }
        System.out.println(isTabix(args[0]));
        try {
            TabixReader tr = new TabixReader(args[0]);


            String s;
            if (args.length == 1) { // no region is specified; print the whole file
                while ((s = tr.readLine()) != null)
                    System.out.println(s);
            } else { // a region is specified; random access
                TabixLineReader iter = tr.query(args[1]); // get the iterator
                while ((s = iter.readLine()) != null)
                    System.out.println(s);
            }
        } catch (IOException e) {
        }
    }
}