/*
 * Decompiled with CFR 0.152.
 */
package projects.gemoma;

import de.jstacs.DataType;
import de.jstacs.io.FileManager;
import de.jstacs.parameters.EnumParameter;
import de.jstacs.parameters.FileParameter;
import de.jstacs.parameters.Parameter;
import de.jstacs.parameters.SimpleParameter;
import de.jstacs.parameters.validation.FileExistsValidator;
import de.jstacs.results.ResultSet;
import de.jstacs.results.TextResult;
import de.jstacs.tools.JstacsTool;
import de.jstacs.tools.ProgressUpdater;
import de.jstacs.tools.Protocol;
import de.jstacs.tools.ToolParameterSet;
import de.jstacs.tools.ToolResult;
import de.jstacs.utils.IntList;
import de.jstacs.utils.SafeOutputStream;
import java.io.BufferedOutputStream;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.Date;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import projects.gemoma.GeMoMaModule;
import projects.gemoma.Tools;

public class Extractor
extends GeMoMaModule {
    private int maxSize;
    static String[] name = new String[]{"cds-parts", "assignment", "proteins", "cds", "genomic", "introns", "identical"};
    static String[] type = new String[name.length];
    private int[] problem = new int[10];
    private int repair = 0;
    private boolean rep;
    StringBuffer shortInfo = new StringBuffer();
    StringBuffer discarded = new StringBuffer();
    private HashMap<Integer, int[]> intronL = new HashMap();
    private long anz = 0L;
    private static String par;
    private static String gID;
    private static String tID;
    ArrayList<SafeOutputStream> out;
    ArrayList<Part> part = new ArrayList();
    StringBuffer dnaSeqBuff = new StringBuffer();
    IntList message = new IntList();
    HashMap<String, int[]> donor;
    HashMap<String, int[]> acceptor;
    HashMap<Integer, int[]> count;
    int a = 0;
    int d = 0;
    static final String EXAMPLE = " Here is an example\n\n+---+------------------------------+\n| I | ATT, ATC, ATA                |\n+---+------------------------------+\n| L | CTT, CTC, CTA, CTG, TTA, TTG |\n+---+------------------------------+\n| V | GTT, GTC, GTA, GTG           |\n+---+------------------------------+\n|...| ...                          |\n+---+------------------------------+";

    static {
        int i = 0;
        while (i < name.length) {
            Extractor.type[i] = i != 1 ? "fasta" : "tabular";
            ++i;
        }
        Extractor.type[5] = "gff";
        Extractor.type[6] = "tabular";
        par = "Parent=";
        gID = "gene_id \"";
        tID = "transcript_id \"";
    }

    public Extractor(int maxSize) {
        this.maxSize = maxSize;
    }

    private static void getOut(String prefix, List<File> file, List<SafeOutputStream> out, String temp) throws IOException {
        File f = prefix == null ? null : Tools.createTempFile("Extractor-" + prefix, temp);
        BufferedOutputStream b = f == null ? null : new BufferedOutputStream(new FileOutputStream(f));
        file.add(f);
        out.add(SafeOutputStream.getSafeOutputStream(b));
    }

    @Override
    public ToolResult run(ToolParameterSet parameters, Protocol protocol, ProgressUpdater progress, int threads, String temp) throws Exception {
        String line;
        this.shortInfo.delete(0, this.shortInfo.length());
        this.discarded.delete(0, this.discarded.length());
        progress.setIndeterminate();
        HashMap<String, String> selected = null;
        String comment = null;
        Parameter p = parameters.getParameterForName("selected");
        if (p.isSet()) {
            selected = Tools.getSelection(p.getValue().toString(), this.maxSize, protocol);
            protocol.append("selected: " + selected.size() + "\t" + selected + "\n");
        }
        HashMap<String, HashMap<String, Gene>> annot = Extractor.read((Boolean)parameters.getParameterForName("upcase IDs").getValue(), parameters.getParameterForName("annotation").getValue().toString(), selected, protocol);
        InputStream in = Tools.getInputStream(parameters.getParameterForName("genetic code"), "projects/gemoma/test_data/genetic_code.txt");
        HashMap<String, Character> code = Tools.getCode(in);
        Tools.Ambiguity ambi = (Tools.Ambiguity)((Object)parameters.getParameterForName("Ambiguity").getValue());
        boolean discardPreMatureStop = (Boolean)parameters.getParameterForName("discard pre-mature stop").getValue();
        boolean stopCodonEx = (Boolean)parameters.getParameterForName("stop-codon excluded from CDS").getValue();
        boolean fullLength = (Boolean)parameters.getParameterForName("full-length").getValue();
        boolean verbose = (Boolean)parameters.getParameterForName("verbose").getValue();
        boolean longComment = (Boolean)this.getParameter(parameters, "long fasta comment").getValue();
        this.rep = (Boolean)parameters.getParameterForName("repair").getValue();
        ArrayList<File> file = new ArrayList<File>();
        this.out = new ArrayList();
        Extractor.getOut(name[0], file, this.out, temp);
        Extractor.getOut(name[1], file, this.out, temp);
        int i = 2;
        while (i < name.length) {
            p = parameters.getParameterForName(name[i]);
            Extractor.getOut(p != null && (Boolean)p.getValue() != false ? name[i] : null, file, this.out, temp);
            ++i;
        }
        this.out.get(1).writeln("#geneID\ttranscript\tcds-parts\tphases\tchr\tstrand\tstart\tend\tfull-length\tlongest intron\tsmallest exon\tsplit AA");
        this.out.get(6).writeln("#used transcript\tdiscarded transcript");
        BufferedReader r = Tools.openGzOrPlain(parameters.getParameterForName("genome").getValue().toString());
        StringBuffer seq = new StringBuffer();
        this.donor = new HashMap();
        this.acceptor = new HashMap();
        this.count = new HashMap();
        Arrays.fill(this.problem, 0);
        int[] info = new int[3];
        Arrays.fill(info, 0);
        HashSet<String> unUsedChr = new HashSet<String>(annot.keySet());
        while ((line = r.readLine()) != null) {
            if (line.startsWith(">")) {
                this.extract(stopCodonEx, fullLength, ambi, protocol, verbose, comment, info, seq, annot, code, discardPreMatureStop, longComment);
                unUsedChr.remove(comment);
                int idx = line.indexOf(32);
                comment = line.substring(1, idx < 0 ? line.length() : idx);
                seq.delete(0, seq.length());
                continue;
            }
            seq.append(line.trim().toUpperCase());
        }
        this.extract(stopCodonEx, fullLength, ambi, protocol, verbose, comment, info, seq, annot, code, discardPreMatureStop, longComment);
        unUsedChr.remove(comment);
        r.close();
        ArrayList<TextResult> res = new ArrayList<TextResult>();
        int i2 = 0;
        while (i2 < file.size()) {
            File current = file.get(i2);
            if (current != null) {
                this.out.get(i2).close();
                res.add(new TextResult(name[i2], "Result", new FileParameter.FileRepresentation(current.getAbsolutePath()), type[i2], this.getToolName(), null, true));
            }
            ++i2;
        }
        this.shortInfo.append("\ngenes\t" + info[0] + "\n");
        this.shortInfo.append("identical CDS of same gene\t" + info[1] + "\n");
        this.shortInfo.append("transcripts\t" + info[2] + "\n\n");
        this.shortInfo.append("reasons for discarding transcripts:\n");
        this.shortInfo.append("ambiguous nucleotide\t" + this.problem[0] + "\n");
        this.shortInfo.append("start phase not zero\t" + this.problem[1] + "\n");
        this.shortInfo.append("missing start\t" + this.problem[2] + "\n");
        this.shortInfo.append("missing stop\t" + this.problem[3] + "\n");
        this.shortInfo.append("premature stop\t" + this.problem[4] + "\n");
        this.shortInfo.append("no DNA\t" + this.problem[5] + "\n");
        this.shortInfo.append("wrong phase\t" + this.problem[6] + "\n");
        this.shortInfo.append("conflicting phase\t" + this.problem[7] + "\n");
        this.shortInfo.append("not linear\t" + this.problem[8] + "\n\n");
        this.shortInfo.append("unexpected error\t" + this.problem[9] + "\n\n");
        this.shortInfo.append("repaired\t" + this.repair + "\n\n");
        if (this.discarded.length() > 0) {
            this.shortInfo.append("discarded transcript IDs: " + this.discarded + "\n\n");
        }
        if (unUsedChr.size() > 0) {
            this.shortInfo.append("WARNING: There are gene annotations on chromosomes/contigs with missing reference sequence: " + unUsedChr + "\n");
        }
        this.shortInfo.append("\n");
        protocol.append(this.shortInfo.toString());
        Object[] array = new Integer[this.count.size()];
        if (array.length > 0) {
            protocol.append("coding exons\t#\n");
            this.count.keySet().toArray(array);
            Arrays.sort(array);
            int i3 = 0;
            while (i3 < array.length) {
                protocol.append(array[i3] + "\t" + this.count.get(array[i3])[0] + "\n");
                ++i3;
            }
        }
        if (this.acceptor.size() > 0) {
            protocol.append("\nacceptor\t#\n");
            for (Map.Entry<String, int[]> e : this.acceptor.entrySet()) {
                protocol.append("acceptor\t" + e.getKey() + "\t" + e.getValue()[0] + "\t" + (double)e.getValue()[0] / (double)this.a + "\n");
            }
        }
        if (this.donor.size() > 0) {
            protocol.append("\ndonor\t#\n");
            for (Map.Entry<String, int[]> e : this.donor.entrySet()) {
                protocol.append("donor\t" + e.getKey() + "\t" + e.getValue()[0] + "\t" + (double)e.getValue()[0] / (double)this.d + "\n");
            }
        }
        if (this.intronL != null && this.intronL.size() > 0) {
            protocol.append("\nintron length\t#\tcumulative\n");
            Object[] il = new Integer[this.intronL.size()];
            this.intronL.keySet().toArray(il);
            Arrays.sort(il);
            double all = 0.0;
            int j = 0;
            while (j < il.length) {
                int[] stat = this.intronL.get(il[j]);
                protocol.append(il[j] + "\t" + stat[0] + "\t" + (all += (double)stat[0]) / (double)this.anz + "\n");
                ++j;
            }
        }
        return new ToolResult("", "", null, new ResultSet(res), parameters, this.getToolName(), new Date());
    }

    static void add(HashSet<String> errors, HashMap<String, Gene> trans, HashMap<String, HashMap<String, Gene>> annot, String evidence, String c, String strand, String geneID, String transcriptID, String attributes) {
        Gene gene;
        HashMap<String, Gene> chr = annot.get(c);
        if (chr == null) {
            chr = new HashMap();
            annot.put(c, chr);
        }
        if ((gene = chr.get(geneID)) == null) {
            gene = new Gene(evidence, geneID, strand);
            chr.put(geneID, gene);
        } else if (gene.strand != (strand.charAt(0) == '+' ? 1 : -1)) {
            errors.add(String.valueOf(c) + ";" + geneID);
        }
        gene.add(transcriptID, attributes);
        trans.put(transcriptID, gene);
    }

    public static HashMap<String, HashMap<String, Gene>> read(boolean upcaseIDs, String input, HashMap<String, String> selected, Protocol protocol) throws IOException {
        int h;
        int idx;
        String[] split;
        String line;
        HashMap<String, HashMap<String, Gene>> annot = new HashMap<String, HashMap<String, Gene>>();
        Gene gene = null;
        BufferedReader r = Tools.openGzOrPlain(input);
        HashMap<String, Gene> trans = new HashMap<String, Gene>();
        HashMap<String, ArrayList<String[]>> additional = new HashMap<String, ArrayList<String[]>>();
        ArrayList<String[]> cds = new ArrayList<String[]>();
        boolean first = true;
        boolean gff = true;
        HashSet<String> errors = new HashSet<String>();
        block10: while ((line = r.readLine()) != null) {
            int index;
            if (gff && line.equalsIgnoreCase("##FASTA")) {
                protocol.append("Stop reading the annotation file because of '##FASTA'\n");
                break;
            }
            if (line.length() == 0 || line.startsWith("#")) continue;
            if (!gff && (index = line.indexOf(35)) > 0) {
                line = line.substring(0, index);
            }
            if ((split = line.split("\t")).length != 9) {
                throw new IllegalArgumentException("This line does not seem to be a valid (tab-delimited) line of the annotation with 9 columns: " + line);
            }
            if (first) {
                gff = split[8].indexOf(61) > 0;
                protocol.append("detected annotation format: " + (gff ? "GFF" : "GTF") + "\n");
                first = false;
            }
            switch (split[2]) {
                case "CDS": {
                    String transcriptID;
                    if (gff) {
                        boolean add = true;
                        if (split[8].indexOf(par) < 0) {
                            idx = split[8].indexOf("ID=");
                            if (idx >= 0) {
                                split[8] = split[8].replace("ID=", par);
                                idx = split[8].indexOf(par) + par.length();
                                h = split[8].indexOf(59, idx);
                            } else {
                                add = false;
                            }
                        }
                        if (!add) continue block10;
                        cds.add(split);
                        break;
                    }
                    idx = split[8].indexOf(tID) + tID.length();
                    String tr = transcriptID = split[8].substring(idx, split[8].indexOf(34, idx));
                    if (upcaseIDs) {
                        tr = tr.toUpperCase();
                    }
                    split[8] = split[8].replace(String.valueOf(tID) + transcriptID + "\"", String.valueOf(par) + tr);
                    transcriptID = tr;
                    idx = split[8].indexOf(gID) + gID.length();
                    String geneID = split[8].substring(idx, split[8].indexOf(34, idx));
                    if (geneID.length() == 0) {
                        geneID = String.valueOf(transcriptID) + ".gene";
                    }
                    Extractor.add(errors, trans, annot, split[1], split[0], split[6], geneID, transcriptID, null);
                    cds.add(split);
                    break;
                }
                case "transcript": 
                case "mRNA": 
                case "prediction": {
                    String geneID;
                    if (!gff) continue block10;
                    idx = split[8].indexOf("ID=") + 3;
                    h = split[8].indexOf(59, idx);
                    String transcriptID = split[8].substring(idx, h > 0 ? h : split[8].length());
                    if (upcaseIDs) {
                        transcriptID = transcriptID.toUpperCase();
                    }
                    if (selected != null && !selected.containsKey(transcriptID)) continue block10;
                    idx = split[8].indexOf(par);
                    if (idx >= 0) {
                        h = split[8].indexOf(59, idx += par.length());
                    }
                    String string = idx < 0 ? String.valueOf(transcriptID) + ".gene" : (geneID = split[8].substring(idx, h > 0 ? h : split[8].length()));
                    if (geneID.indexOf(44) >= 0) {
                        protocol.appendWarning("Could not parse line (multiple parents): " + line + "\n");
                    }
                    Extractor.add(errors, trans, annot, split[1], split[0], split[6], geneID, transcriptID, split[8]);
                    break;
                }
                default: {
                    ArrayList<String[]> l;
                    if (!gff || (idx = split[8].indexOf(par)) < 0) continue block10;
                    h = split[8].indexOf(59, idx += par.length());
                    String parentID = split[8].substring(idx, h > 0 ? h : split[8].length());
                    if (upcaseIDs) {
                        parentID = parentID.toUpperCase();
                    }
                    if ((l = (ArrayList<String[]>)additional.get(parentID)) == null) {
                        l = new ArrayList<String[]>();
                        additional.put(parentID, l);
                    }
                    l.add(split);
                }
            }
        }
        r.close();
        protocol.append("number of detected CDS lines: " + cds.size() + "\n");
        HashSet<Gene> usedG = new HashSet<Gene>();
        HashSet<String> usedT = new HashSet<String>();
        int i = 0;
        while (i < cds.size()) {
            split = (String[])cds.get(i);
            idx = split[8].indexOf(par) + par.length();
            h = split[8].indexOf(59, idx);
            String[] parent = split[8].substring(idx, h > 0 ? h : split[8].length()).split(",");
            int j = 0;
            while (j < parent.length) {
                if (upcaseIDs) {
                    parent[j] = parent[j].toUpperCase();
                }
                gene = (Gene)trans.get(parent[j]);
                if (selected == null || selected.containsKey(parent[j])) {
                    int s;
                    if (gene == null) {
                        Extractor.add(errors, trans, annot, split[1], split[0], split[6], String.valueOf(parent[j]) + ".gene", parent[j], null);
                        gene = trans.get(parent[j]);
                    }
                    usedG.add(gene);
                    usedT.add(parent[j]);
                    int n = s = split[6].charAt(0) == '+' ? 1 : -1;
                    if (gene.strand != s) {
                        errors.add(String.valueOf(split[0]) + ";" + gene.id);
                    }
                    gene.add(parent[j], new int[]{s, Integer.parseInt(split[3]), Integer.parseInt(split[4]), split[7].charAt(0) == '.' ? -100000 : Integer.parseInt(split[7])});
                }
                ++j;
            }
            ++i;
        }
        if (additional.size() > 0) {
            for (Gene g : usedG) {
                for (String tid : g.transcript.keySet()) {
                    ArrayList add = (ArrayList)additional.get(tid);
                    if (add == null) continue;
                    Transcript t = g.transcript.get(tid);
                    t.add = add;
                }
            }
        }
        if (errors.size() > 0) {
            protocol.append("number of genes with errors: " + errors.size() + "\n");
            for (String er : errors) {
                split = er.split(";");
                HashMap<String, Gene> x = annot.get(split[0]);
                Gene g = x.remove(split[1]);
                usedG.remove(g);
                usedT.removeAll(g.transcript.keySet());
            }
        }
        protocol.append("number of detected genes: " + usedG.size() + "\n");
        protocol.append("number of detected transcripts: " + usedT.size() + "\n");
        return annot;
    }

    private static boolean identical(IntList il1, IntList il2) {
        if (il1.length() != il2.length()) {
            return false;
        }
        int i = 0;
        while (i < il1.length()) {
            if (il1.get(i) != il2.get(i)) {
                return false;
            }
            ++i;
        }
        return true;
    }

    private void extract(boolean stopCodonEx, boolean fullLength, Tools.Ambiguity ambi, Protocol protocol, boolean verbose, String comment, int[] info, StringBuffer seq, HashMap<String, HashMap<String, Gene>> annot, HashMap<String, Character> code, boolean discardPreMatureStop, boolean longComment) throws Exception {
        if (comment == null) {
            return;
        }
        int idx = comment.indexOf(32);
        String chr = idx > 0 ? comment.substring(0, idx) : comment;
        HashMap<String, Gene> chrAnnot = annot.get(chr);
        if (chrAnnot == null) {
            return;
        }
        ArrayList<Gene> genes = new ArrayList<Gene>(chrAnnot.values());
        Collections.sort(genes);
        int max = 5000;
        boolean[] used = new boolean[max];
        boolean[] donS = new boolean[max];
        boolean[] accS = new boolean[max];
        String[] don = new String[max];
        String[] acc = new String[max];
        SafeOutputStream ident = this.out.get(6);
        HashMap<String, int[]> introns = new HashMap<String, int[]>();
        for (Gene gene : genes) {
            int i;
            int j;
            IntList il;
            if (gene.transcript.size() <= 0) continue;
            int[] val = null;
            boolean[][] splits = new boolean[gene.exon.size()][gene.exon.size()];
            int k = 0;
            while (k < splits.length) {
                Arrays.fill(splits[k], false);
                ++k;
            }
            int strand = gene.strand;
            boolean forward = strand == 1;
            gene.reduce(gene.id, info, ident);
            this.part.clear();
            Object[] id = new String[gene.transcript.size()];
            gene.transcript.keySet().toArray(id);
            Arrays.sort(id);
            Arrays.fill(accS, false);
            Arrays.fill(donS, false);
            int k2 = 0;
            while (k2 < id.length) {
                il = gene.transcript.get((Object)id[k2]).b;
                j = 0;
                while (j < il.length()) {
                    i = il.get(j);
                    if (j != 0) {
                        accS[i] = true;
                    }
                    if (j + 1 < il.length()) {
                        donS[i] = true;
                    }
                    ++j;
                }
                ++k2;
            }
            i = 0;
            while (i < gene.exon.size()) {
                String s;
                val = gene.exon.get(i);
                int off1 = val[1] - 1 >= 2 ? 2 : 0;
                int off2 = val[2] + 2 <= seq.length() ? 2 : 0;
                try {
                    String p = seq.substring(val[1] - 1 - off1, val[2] + off2);
                    if (strand < 0) {
                        p = Tools.rc(p);
                    }
                    if (strand > 0) {
                        s = p.substring(off1, p.length() - off2);
                        acc[i] = off1 > 0 ? p.substring(0, off1) : "";
                        don[i] = off2 > 0 ? p.substring(p.length() - off2, p.length()) : "";
                    } else {
                        s = p.substring(off2, p.length() - off1);
                        acc[i] = off2 > 0 ? p.substring(0, off2) : "";
                        don[i] = off1 > 0 ? p.substring(p.length() - off1, p.length()) : "";
                    }
                }
                catch (StringIndexOutOfBoundsException sioobe) {
                    s = null;
                }
                this.part.add(new Part(s, val));
                ++i;
            }
            Arrays.fill(used, false);
            k2 = 0;
            while (k2 < id.length) {
                il = gene.transcript.get((Object)id[k2]).b;
                boolean[] set = new boolean[il.length()];
                j = 0;
                while (j < il.length()) {
                    Part current = this.part.get(il.get(j));
                    set[j] = current.aa != null;
                    ++j;
                }
                int prob = this.transcript(seq, stopCodonEx, chr, gene, (String)id[k2], -1, splits, fullLength, info, ambi, code, protocol, verbose, used, acc, don, discardPreMatureStop, longComment);
                if (prob >= 0 && this.rep) {
                    Part current;
                    int test;
                    int phase = -1;
                    do {
                        ++phase;
                        j = 0;
                        while (j < il.length()) {
                            current = this.part.get(il.get(j));
                            if (!set[j]) {
                                current.offsetLeft = -100000;
                                current.aa = null;
                            }
                            ++j;
                        }
                    } while ((test = this.transcript(seq, stopCodonEx, chr, gene, (String)id[k2], phase, splits, fullLength, info, ambi, code, protocol, false, used, acc, don, discardPreMatureStop, longComment)) >= 0 && phase <= 2);
                    if (test < 0) {
                        if (verbose) {
                            protocol.appendWarning(String.valueOf(id[k2]) + "\trepaired with start phase " + phase + "\n");
                        }
                        ++this.repair;
                        prob = -1;
                    } else if (prob >= 0) {
                        j = 0;
                        while (j < il.length()) {
                            current = this.part.get(il.get(j));
                            if (!set[j]) {
                                current.offsetLeft = -100000;
                                current.aa = null;
                            }
                            ++j;
                        }
                    }
                }
                if (prob >= 0) {
                    int n = prob;
                    this.problem[n] = this.problem[n] + 1;
                    this.discarded.append(String.valueOf(this.discarded.length() > 0 ? ", " : "") + (String)id[k2]);
                } else {
                    int last = -1;
                    j = 0;
                    while (j < il.length()) {
                        Part current = this.part.get(il.get(j));
                        if (last != -1) {
                            int inLe = forward ? current.start - last : last - current.end;
                            int[] num = this.intronL.get(inLe);
                            if (num == null) {
                                num = new int[1];
                                this.intronL.put(inLe, num);
                            }
                            num[0] = num[0] + 1;
                            ++this.anz;
                        }
                        last = forward ? current.end : current.start;
                        ++j;
                    }
                }
                ++k2;
            }
            j = 0;
            while (j < gene.exon.size()) {
                if (used[j]) {
                    Part p = this.part.get(j);
                    if (p.aa.length() > 0) {
                        this.out.get(0).write(">" + gene.id + "_" + j + "\n" + p.aa + "\n");
                    }
                    if (!this.out.get(5).doesNothing()) {
                        int k3 = 0;
                        while (k3 < splits.length) {
                            if (splits[j][k3]) {
                                int en;
                                int st;
                                if (forward) {
                                    st = gene.exon.get(j)[2] + 1;
                                    en = gene.exon.get(k3)[1];
                                } else {
                                    st = gene.exon.get(k3)[2] + 1;
                                    en = gene.exon.get(j)[1];
                                }
                                String key = String.valueOf(chr) + "\tannotation\tintron\t" + st + "\t" + en + "\t\t" + (forward ? "+" : "-") + "\t.\t.";
                                int[] counts = (int[])introns.get(key);
                                if (counts == null) {
                                    counts = new int[1];
                                    introns.put(key, counts);
                                }
                                counts[0] = counts[0] + 1;
                            }
                            ++k3;
                        }
                    }
                }
                ++j;
            }
        }
        if (!this.out.get(5).doesNothing()) {
            Object[] keys = introns.keySet().toArray(new String[0]);
            Arrays.sort(keys);
            int i = 0;
            while (i < keys.length) {
                int[] count = (int[])introns.get(keys[i]);
                this.out.get(5).writeln(((String)keys[i]).replaceFirst("\t\t", "\t" + count[0] + "\t"));
                ++i;
            }
        }
    }

    int transcript(StringBuffer seq, boolean stopCodonEx, String chr, Gene gene, String trans, int s, boolean[][] splits, boolean fullLength, int[] info, Tools.Ambiguity ambi, HashMap<String, Character> code, Protocol protocol, boolean verbose, boolean[] used, String[] acc, String[] don, boolean discardPreMatureStop, boolean longComment) throws IOException {
        int startPhase;
        this.dnaSeqBuff.delete(0, this.dnaSeqBuff.length());
        int currentProb = -1;
        Transcript tr = gene.transcript.get(trans);
        IntList il = tr.b;
        if (il.length() == 0) {
            protocol.append("No coding exon(s) for: " + trans + "\n");
            return -1;
        }
        int start = gene.strand > 0 ? gene.exon.get(il.get(0))[1] : gene.exon.get(il.get(il.length() - 1))[1];
        int end = gene.strand > 0 ? gene.exon.get(il.get(il.length() - 1))[2] : gene.exon.get(il.get(0))[2];
        int n = startPhase = s >= 0 && s < 3 ? s : this.part.get((int)il.get((int)0)).offsetLeft;
        if (startPhase == -100000) {
            startPhase = 0;
        }
        int offset = 3 - startPhase;
        int pa = -1;
        this.message.clear();
        Part current = null;
        int minExon = Integer.MAX_VALUE;
        int maxIntron = -1;
        int lastPos = -1;
        int j = 0;
        while (j < il.length()) {
            int l;
            pa = il.get(j);
            current = this.part.get(pa);
            if (lastPos >= 0 && (gene.strand > 0 && current.start <= lastPos || gene.strand < 0 && current.end >= lastPos)) {
                if (verbose) {
                    int oldPa = il.get(j - 1);
                    Part old = this.part.get(oldPa);
                    protocol.appendWarning(String.valueOf(trans) + "\tnot linear: " + il.toString() + " strand=" + gene.strand + " " + oldPa + ":" + old.start + ".." + old.end + " " + pa + ":" + current.start + ".." + current.end + "\n");
                }
                return 8;
            }
            lastPos = gene.strand > 0 ? current.end : current.start;
            if (current.dna == null) {
                currentProb = 1;
                break;
            }
            this.dnaSeqBuff.append(current.dna);
            if (current.offsetLeft == -100000) {
                current.offsetLeft = (3 - offset) % 3;
            }
            if (current.aa == null) {
                try {
                    current.aa = Tools.translate(current.offsetLeft, current.dna, code, false, ambi);
                }
                catch (IllegalArgumentException iae) {
                    current.aa = null;
                    currentProb = 0;
                    break;
                }
                current.offsetRight = current.dna.length() - current.offsetLeft - 3 * current.aa.length();
            } else if ((fullLength || j > 0) && current.offsetLeft != (3 - offset) % 3) {
                currentProb = 2;
                break;
            }
            if (current.aa != null && current.aa.length() > 0 && !current.aa.matches("[A-Za-z" + (discardPreMatureStop ? "" : "\\*") + "]*" + (j + 1 == il.length() && !stopCodonEx ? "\\*" + (fullLength ? "{1}" : "{0,1}") : ""))) {
                this.message.add(pa);
            }
            if (current.aa != null && current.aa.length() < minExon) {
                minExon = current.aa.length();
            }
            if (j > 0 && (l = gene.strand == 1 ? gene.exon.get(il.get(j))[1] - gene.exon.get(il.get(j - 1))[2] - 1 : gene.exon.get(il.get(j - 1))[1] - gene.exon.get(il.get(j))[2] - 1) > maxIntron) {
                maxIntron = l;
            }
            offset = current.offsetRight;
            ++j;
        }
        if (stopCodonEx && current != null) {
            if (current.aa == null || current.aa.length() == 0) {
                current.aa = "*";
            } else if (current.aa.charAt(current.aa.length() - 1) != '*') {
                current.aa = String.valueOf(current.aa) + "*";
            }
        }
        String p = null;
        if (j == il.length()) {
            if (ambi == Tools.Ambiguity.EXCEPTION && !this.dnaSeqBuff.toString().matches("[ACGT]*")) {
                j = il.length() + 1;
                currentProb = 0;
            } else {
                try {
                    p = Tools.translate(startPhase, this.dnaSeqBuff.toString(), code, false, ambi);
                    if (stopCodonEx) {
                        p = String.valueOf(p) + "*";
                    }
                }
                catch (IllegalArgumentException iae) {
                    j = il.length() + 1;
                    currentProb = 0;
                }
            }
        }
        if (j == il.length()) {
            boolean preMature;
            if (p.length() == 0) {
                return 9;
            }
            int idx = p.indexOf(42);
            boolean bl = preMature = idx >= 0 && idx < p.length() - 1;
            if (fullLength && startPhase != 0) {
                if (verbose) {
                    protocol.appendWarning(String.valueOf(trans) + "\tskip start phase not zero\n");
                }
                return 1;
            }
            if (fullLength && p.charAt(0) != 'M') {
                if (verbose) {
                    this.writeWarning(protocol, trans, il.length(), "skip missing start", p, this.dnaSeqBuff);
                }
                return 2;
            }
            if (fullLength && p.charAt(p.length() - 1) != '*') {
                if (verbose) {
                    this.writeWarning(protocol, trans, il.length(), "skip missing stop", p, this.dnaSeqBuff);
                }
                return 3;
            }
            if (preMature && discardPreMatureStop) {
                if (verbose) {
                    this.writeWarning(protocol, trans, il.length(), "skip premature stop", p, this.dnaSeqBuff);
                }
                return 4;
            }
            if (this.message.length() > 0) {
                if (verbose) {
                    String c = "";
                    j = 0;
                    while (j < il.length()) {
                        pa = il.get(j);
                        current = this.part.get(pa);
                        c = String.valueOf(c) + "cds-parts: " + pa + " (phase: " + current.offsetLeft + ")\nDNA: " + current.dna + "\nAA: " + current.aa + "\n";
                        ++j;
                    }
                    protocol.appendWarning(String.valueOf(trans) + "\tskip wrong phase for coding part(s) = " + this.message + "\n" + "\nCDS: " + this.dnaSeqBuff.toString() + "\nprotein: " + p + "\n\nparts:\n" + c);
                }
                return 6;
            }
            info[2] = info[2] + 1;
            String comment = trans;
            if (longComment) {
                comment = String.valueOf(comment) + " gene=" + gene.id + " chr=" + chr + " strand=" + gene.strand + " interval=" + start + ".." + end;
                if (tr.attributes != null) {
                    String a = this.deleteAttribute("Parent=", tr.attributes);
                    a = this.deleteAttribute("ID=", a);
                    a = a.replaceAll(";", " ");
                    comment = String.valueOf(comment) + " " + a;
                }
            }
            this.out.get(3).write(">" + comment + "\n" + this.dnaSeqBuff.toString() + "\n");
            this.out.get(2).write(">" + comment + "\n" + p + "\n");
            String x = il.toString();
            SafeOutputStream sos = this.out.get(1);
            sos.write(String.valueOf(gene.id) + "\t" + trans + "\t" + x.substring(1, x.length() - 1).replaceAll(" ", ""));
            String splitAA = "";
            int currentPos = 0;
            j = 0;
            while (j < il.length()) {
                current = this.part.get(il.get(j));
                sos.write(String.valueOf(j == 0 ? "\t" : ",") + current.offsetLeft);
                int pos = currentPos / 3;
                int oldPos = currentPos;
                currentPos += current.dna.length();
                if (j > 1) {
                    splitAA = String.valueOf(splitAA) + ",";
                }
                if (oldPos % 3 > 0 && pos < currentPos / 3) {
                    splitAA = String.valueOf(splitAA) + p.substring(pos, pos + 1);
                }
                ++j;
            }
            sos.write("\t" + chr + "\t" + gene.strand + "\t" + start + "\t" + end + "\t" + (p.charAt(0) == 'M' && p.charAt(p.length() - 1) == '*') + "\t" + (maxIntron > 0 ? Integer.valueOf(maxIntron) : "NA") + "\t" + minExon + "\t" + splitAA + "\n");
            j = 0;
            while (j < il.length()) {
                used[il.get((int)j)] = true;
                ++j;
            }
            int[] c = this.count.get(il.length());
            if (c == null) {
                c = new int[1];
                this.count.put(il.length(), c);
            }
            c[0] = c[0] + 1;
            if (!this.out.get(4).doesNothing()) {
                int st;
                boolean forward;
                StringBuffer genomicRegion = new StringBuffer();
                int off = 300;
                Part firstP = this.part.get(il.get(0));
                Part lastP = this.part.get(il.get(il.length() - 1));
                boolean bl2 = forward = gene.strand == 1;
                if (forward) {
                    st = Math.max(firstP.start - off, 1);
                    genomicRegion = new StringBuffer(seq.substring(st - 1, Math.min(seq.length(), lastP.end + off)).toLowerCase());
                } else {
                    st = Math.min(seq.length(), firstP.end + off);
                    genomicRegion = new StringBuffer(Tools.rc(seq.substring(Math.max(lastP.start - off, 1) - 1, st)).toLowerCase());
                }
                j = 0;
                while (j < il.length()) {
                    Part t = this.part.get(il.get(j));
                    int a = t.start - st;
                    int b = t.end - st;
                    if (forward) {
                        genomicRegion.replace(a, b + 1, genomicRegion.substring(a, b + 1).toUpperCase());
                    } else {
                        genomicRegion.replace(-b, -a + 1, genomicRegion.substring(-b, -a + 1).toUpperCase());
                    }
                    ++j;
                }
                this.out.get(4).writeln(">" + comment);
                this.out.get(4).writeln(genomicRegion);
            }
            int ignoreAAForSpliceSite = 30;
            int last = -1;
            j = 0;
            while (j < il.length()) {
                int add;
                int targetEnd;
                int targetStart;
                pa = il.get(j);
                current = this.part.get(pa);
                if (last != -1) {
                    splits[last][pa] = true;
                }
                last = pa;
                int[] exon = gene.exon.get(pa);
                if (gene.strand == 1) {
                    targetStart = exon[1] + current.offsetLeft;
                    targetEnd = exon[2] - current.offsetRight;
                } else {
                    targetStart = exon[1] + current.offsetRight;
                    targetEnd = exon[2] - current.offsetLeft;
                }
                int l = Math.abs(targetStart - 1 - targetEnd);
                int t = 3 * ignoreAAForSpliceSite;
                if (l / 3 < t) {
                    add = l / 3;
                    add -= add % 3;
                } else {
                    add = t;
                }
                if (current.dna.length() > 0) {
                    if (j > 0 && acc[pa].length() > 0) {
                        int[] stat = this.acceptor.get(acc[pa]);
                        if (stat == null) {
                            stat = new int[1];
                            this.acceptor.put(acc[pa], stat);
                        }
                        stat[0] = stat[0] + 1;
                        ++this.a;
                    }
                    if (j + 1 < il.length() && don[pa].length() > 0) {
                        int[] stat = this.donor.get(don[pa]);
                        if (stat == null) {
                            stat = new int[1];
                            this.donor.put(don[pa], stat);
                        }
                        stat[0] = stat[0] + 1;
                        ++this.d;
                    }
                }
                ++j;
            }
            return -1;
        }
        switch (currentProb) {
            case 0: {
                if (verbose) {
                    if (j < il.length()) {
                        protocol.appendWarning(String.valueOf(trans) + "\tskip non-ACGT coding part " + j + "\n");
                    } else {
                        protocol.appendWarning(String.valueOf(trans) + "\tskip non-ACGT coding protein\n");
                    }
                }
                return 0;
            }
            case 1: {
                if (verbose) {
                    protocol.appendWarning(String.valueOf(trans) + "\tskip no DNA for coding part " + j + "\n");
                }
                return 5;
            }
            case 2: {
                if (verbose) {
                    protocol.appendWarning(String.valueOf(trans) + "\tskip conflicting phase for coding part " + j + "\n");
                }
                return 7;
            }
        }
        return 8;
    }

    String deleteAttribute(String attribute, String all) {
        int startIndex = all.indexOf(attribute);
        if (startIndex >= 0) {
            int endIndex = all.indexOf(59, startIndex);
            all = endIndex < 0 ? all.substring(0, startIndex) : String.valueOf(all.substring(0, startIndex)) + all.substring(endIndex + 1);
        }
        return all;
    }

    void writeWarning(Protocol protocol, String trans, int numExons, String reason, String p, CharSequence dna) {
        protocol.appendWarning(String.valueOf(trans) + "\tnumExons=" + numExons + "\t" + reason + "\n" + p + "\n" + dna + "\n");
    }

    @Override
    public ToolParameterSet getToolParameters() {
        try {
            return new ToolParameterSet(this.getShortName(), new FileParameter("annotation", "Reference annotation file (GFF or GTF), which contains gene models annotated in the reference genome", "gff,gff3,gtf,gff.gz,gff3.gz,gtf.gz", true, new FileExistsValidator(), true), new FileParameter("genome", "Reference genome file (FASTA)", "fasta,fa,fas,fna,fasta.gz,fa.gz,fas.gz,fna.gz", true, new FileExistsValidator(), true), new FileParameter("genetic code", "optional user-specified genetic code", "tabular", false), new SimpleParameter(DataType.BOOLEAN, name[2], "whether the complete proteins sequences should returned as output", true, false), new SimpleParameter(DataType.BOOLEAN, name[3], "whether the complete CDSs should returned as output", true, false), new SimpleParameter(DataType.BOOLEAN, name[4], "whether the genomic regions should be returned (upper case = coding, lower case = non coding)", true, false), new SimpleParameter(DataType.BOOLEAN, name[5], "whether introns should be extracted from annotation, that might be used for test cases", true, false), new SimpleParameter(DataType.BOOLEAN, name[6], "if CDS is identical Extractor only used one transcript. This parameter allows to return a table that lists in the first column the used transcript and in the second column the discarded transcript. If no transcript is discarded, the list is empty.", true, false), new SimpleParameter(DataType.BOOLEAN, "upcase IDs", "whether the IDs in the GFF should be upcased", true, false), new SimpleParameter(DataType.BOOLEAN, "repair", "if a transcript annotation can not be parsed, the program will try to infer the phase of the CDS parts to repair the annotation", true, false), new FileParameter("selected", "The path to list file, which allows to make only a predictions for the contained transcript ids. The first column should contain transcript IDs as given in the annotation. Remaining columns will be ignored.", "tabular,txt", this.maxSize > -1, new FileExistsValidator()), new EnumParameter(Tools.Ambiguity.class, "This parameter defines how to deal with ambiguities in the DNA. There are 3 options: EXCEPTION, which will remove the corresponding transcript, AMBIGUOUS, which will use an X for the corresponding amino acid, and RANDOM, which will randomly select an amnio acid from the list of possibilities.", true, Tools.Ambiguity.EXCEPTION.toString()), new SimpleParameter(DataType.BOOLEAN, "discard pre-mature stop", "if *true* transcripts with pre-mature stop codon are discarded as they often indicate misannotation", true, true), new SimpleParameter(DataType.BOOLEAN, "stop-codon excluded from CDS", "A flag that states whether the reference annotation contains the stop codon in the CDS annotation or not", true, false), new SimpleParameter(DataType.BOOLEAN, "full-length", "A flag which allows for choosing between only full-length and all (i.e., full-length and partial) transcripts", true, true), new SimpleParameter(DataType.BOOLEAN, "long fasta comment", "whether a short (transcript ID) or a long (transcript ID, gene ID, chromosome, strand, interval) fasta comment should be written for proteins, CDSs, and genomic regions", true, false), new SimpleParameter(DataType.BOOLEAN, "verbose", "A flag which allows to output a wealth of additional information", true, false));
        }
        catch (Exception e) {
            e.printStackTrace();
            throw new RuntimeException();
        }
    }

    @Override
    public String getToolName() {
        return "Extractor";
    }

    @Override
    public String getShortName() {
        return this.getToolName();
    }

    @Override
    public String getDescription() {
        return "extracts parts of CDSs as annotated in a genome (assembly)";
    }

    @Override
    public String getHelpText() {
        return "This tool can be used to create input files for **GeMoMa**, i.e., it creates at least a fasta file containing the translated parts of the CDS and a tabular file containing the assignment of transcripts to genes and parts of CDS to transcripts. In addition, **Extractor** can be used to create several additional files from the final prediction, e.g. proteins, CDSs, ... . Two inputs are mandatory: The genome as fasta or fasta.gz and the corresponding annotation as gff or gff.gz. The gff file should be sorted. If you like to set a user-specific genetic code, please use a tab-delimited file with two columns. The first column contains the amino acid in one letter code, the second a list of tripletts." + MORE;
    }

    @Override
    public JstacsTool.ResultEntry[] getDefaultResultInfos() {
        JstacsTool.ResultEntry[] re = new JstacsTool.ResultEntry[2];
        int i = 0;
        while (i < re.length) {
            re[i] = new JstacsTool.ResultEntry(TextResult.class, type[i], name[i]);
            ++i;
        }
        return re;
    }

    @Override
    public ToolResult[] getTestCases(String path) {
        try {
            return new ToolResult[]{new ToolResult(FileManager.readFile(String.valueOf(path) + File.separator + "tests/gemoma/xml/extractor-test.xml"))};
        }
        catch (Exception e) {
            e.printStackTrace();
            return null;
        }
    }

    public static class Gene
    implements Comparable<Gene> {
        HashMap<String, Transcript> transcript;
        ArrayList<String> attributes;
        ArrayList<int[]> exon;
        int start;
        int end;
        int strand;
        String id;
        String evidence;

        public String toString() {
            return String.valueOf(this.id) + ": " + this.transcript.size() + " transcripts";
        }

        Gene(String evidence, String id, String strand) {
            this.evidence = evidence;
            this.transcript = new HashMap();
            this.exon = new ArrayList();
            this.id = id;
            this.strand = strand.charAt(0) == '+' ? 1 : -1;
            this.end = -1;
            this.start = -1;
            this.attributes = new ArrayList();
        }

        void add(String t, String attributes) {
            Transcript tr = this.transcript.get(t);
            if (tr == null) {
                this.transcript.put(t, new Transcript(attributes));
            }
        }

        void add(String t, int[] border) {
            IntList x = this.transcript.get((Object)t).b;
            int i = 0;
            while (i < this.exon.size()) {
                int[] c = this.exon.get(i);
                int j = 0;
                while (j < c.length && c[j] == border[j]) {
                    ++j;
                }
                if (j >= c.length) break;
                ++i;
            }
            if (i == this.exon.size()) {
                this.exon.add(border);
            }
            x.add(i);
        }

        void reduce(String geneName, int[] info, SafeOutputStream out) throws IOException {
            info[0] = info[0] + 1;
            Object[] s = new String[this.transcript.size()];
            this.transcript.keySet().toArray(s);
            Arrays.sort(s);
            boolean[] in = new boolean[s.length];
            Arrays.fill(in, true);
            int i = 0;
            while (i < s.length) {
                Transcript t = this.transcript.get(s[i]);
                if (t != null) {
                    IntList il = t.b;
                    int j = i + 1;
                    while (in[i] && j < s.length) {
                        if (in[j] && Extractor.identical(this.transcript.get((Object)s[j]).b, il)) {
                            in[j] = false;
                            out.writeln(String.valueOf(s[i]) + "\t" + (String)s[j]);
                            this.transcript.remove(s[j]);
                            info[1] = info[1] + 1;
                        }
                        ++j;
                    }
                }
                ++i;
            }
            this.sortExons();
        }

        void sortExons() {
            Iterator<Map.Entry<String, Transcript>> it = this.transcript.entrySet().iterator();
            boolean swapped = false;
            while (it.hasNext()) {
                Map.Entry<String, Transcript> e = it.next();
                IntList il = e.getValue().b;
                int[] ids = il.toArray();
                int i = 0;
                while (i < ids.length) {
                    int[] current = this.exon.get(ids[i]);
                    int j = i - 1;
                    while (j >= 0) {
                        boolean swap;
                        int[] compare = this.exon.get(ids[j]);
                        boolean bl = swap = current[0] * current[1] < compare[0] * compare[1];
                        if (!swap) break;
                        swapped = true;
                        int help = ids[j];
                        ids[j] = ids[j + 1];
                        ids[j + 1] = help;
                        --j;
                    }
                    ++i;
                }
                if (!swapped) continue;
                il.clear();
                i = 0;
                while (i < ids.length) {
                    il.add(ids[i]);
                    ++i;
                }
            }
        }

        void precompute() {
            this.start = Integer.MAX_VALUE;
            this.end = Integer.MIN_VALUE;
            int i = 0;
            while (i < this.exon.size()) {
                int[] current = this.exon.get(i);
                if (this.start > current[1]) {
                    this.start = current[1];
                }
                if (this.end < current[2]) {
                    this.end = current[2];
                }
                ++i;
            }
            for (Transcript t : this.transcript.values()) {
                if (t.add == null) continue;
                int j = 0;
                while (j < t.add.size()) {
                    String[] s = t.add.get(j);
                    this.start = Math.min(this.start, Integer.parseInt(s[3]));
                    this.end = Math.max(this.end, Integer.parseInt(s[4]));
                    ++j;
                }
            }
        }

        @Override
        public int compareTo(Gene o) {
            if (this.start == -1) {
                this.precompute();
            }
            if (o.start == -1) {
                o.precompute();
            }
            return Integer.compare(this.start + (this.end - this.start) / 2, o.start + (o.end - o.start) / 2);
        }
    }

    static class Part {
        static final int NO_PHASE = -100000;
        String dna;
        String aa;
        int offsetLeft;
        int offsetRight;
        int start;
        int end;

        Part(String dna, int[] val) {
            this.dna = dna;
            this.aa = null;
            this.start = val[1];
            this.end = val[2];
            this.offsetLeft = val[3];
            this.offsetRight = -100000;
        }
    }

    public static class Transcript {
        IntList b;
        String attributes;
        ArrayList<String[]> add;

        public Transcript(String attributes) {
            this.attributes = attributes;
            this.b = new IntList();
            this.add = null;
        }
    }
}

