/*
 * Decompiled with CFR 0.152.
 */
package projects.gemoma;

import de.jstacs.DataType;
import de.jstacs.parameters.ExpandableParameterSet;
import de.jstacs.parameters.FileParameter;
import de.jstacs.parameters.ParameterSetContainer;
import de.jstacs.parameters.SimpleParameter;
import de.jstacs.parameters.SimpleParameterSet;
import de.jstacs.parameters.validation.FileExistsValidator;
import de.jstacs.results.ResultSet;
import de.jstacs.results.TextResult;
import de.jstacs.tools.JstacsTool;
import de.jstacs.tools.ProgressUpdater;
import de.jstacs.tools.Protocol;
import de.jstacs.tools.ToolParameterSet;
import de.jstacs.tools.ToolResult;
import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileReader;
import java.io.FileWriter;
import java.io.IOException;
import java.text.NumberFormat;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Date;
import java.util.HashMap;
import java.util.Locale;
import projects.gemoma.GeMoMaModule;
import projects.gemoma.Tools;

public class BUSCORecomputer
extends GeMoMaModule {
    public static String rem = "<REMAINING>";

    @Override
    public ToolResult run(ToolParameterSet parameters, Protocol protocol, ProgressUpdater progress, int threads, String tempD) throws Exception {
        int i;
        String line;
        String[] regex;
        ExpandableParameterSet eps = (ExpandableParameterSet)parameters.getParameterForName("subgenomes").getValue();
        int poly = eps.getNumberOfParameters();
        if (poly == 0) {
            regex = new String[]{};
            poly = 1;
        } else {
            regex = new String[poly];
            int i2 = 0;
            while (i2 < poly) {
                regex[i2] = (String)((SimpleParameterSet)eps.getParameterAt(i2).getValue()).getParameterAt(0).getValue();
                ++i2;
            }
        }
        FileParameter fp = (FileParameter)parameters.getParameterForName("BUSCO");
        String busco = fp.getValue();
        fp = (FileParameter)parameters.getParameterForName("IDs");
        String genTranscript = fp.getValue();
        HashMap<String, String> trans2gene = new HashMap<String, String>();
        HashMap<String, Integer> gene2subgenome = new HashMap<String, Integer>();
        BufferedReader r = new BufferedReader(new FileReader(genTranscript));
        int[] num = new int[poly + 1];
        while ((line = r.readLine()).charAt(0) == '#') {
        }
        do {
            String[] split = line.split("\t");
            trans2gene.put(split[1], split[0]);
            if (gene2subgenome.containsKey(split[0])) continue;
            int match = -1;
            int j = 0;
            while (j < regex.length) {
                if (split[4].matches(regex[j])) {
                    if (match < 0) {
                        match = j;
                    } else {
                        throw new IllegalArgumentException(String.valueOf(split[4]) + " matches multiple regular expressions: " + regex[match] + " and " + regex[j]);
                    }
                }
                ++j;
            }
            if (match < 0) {
                match = regex.length;
                if (poly == regex.length) {
                    ++poly;
                }
            }
            gene2subgenome.put(split[0], match);
            int n = match;
            num[n] = num[n] + 1;
        } while ((line = r.readLine()) != null);
        r.close();
        if (poly > 1) {
            protocol.append("subgenome\t#transcripts\n");
            int c = 0;
            while (c < poly) {
                protocol.append(String.valueOf(c < regex.length ? regex[c] : (regex.length == 0 ? "" : rem)) + "\t" + num[c] + "\n");
                ++c;
            }
            protocol.append("\n");
        }
        int[][] stat = new int[poly][4];
        double all = 0.0;
        r = new BufferedReader(new FileReader(busco));
        String old = null;
        HashMap[] hash = new HashMap[poly];
        int c = 0;
        while (c < poly) {
            hash[c] = new HashMap();
            ++c;
        }
        File out = Tools.createTempFile("BUSCO-full-table-parsed", tempD);
        BufferedWriter w = new BufferedWriter(new FileWriter(out));
        int anz = 0;
        while ((line = r.readLine()).charAt(0) == '#') {
            if (anz < 2) {
                w.append(String.valueOf(line) + "\n");
            }
            ++anz;
        }
        w.append("# BUSCORecomputer");
        if (regex.length > 0) {
            i = 0;
            while (i < poly) {
                w.append("\t" + (i < regex.length ? regex[i] : rem) + "\t\t");
                ++i;
            }
        }
        w.newLine();
        w.append("# Busco id");
        i = 0;
        while (i < poly) {
            w.append("\tstatus\tgene(s)\ttranscript(s)");
            ++i;
        }
        w.newLine();
        do {
            ArrayList<String> list;
            int v;
            String[] split = line.split("\t");
            if (old != null && !split[0].equals(old)) {
                BUSCORecomputer.add(old, stat, hash, w);
                all += 1.0;
                old = null;
            }
            if ((v = BUSCORecomputer.getIndex(split[1])) == 3) {
                w.append(split[0]);
                int c2 = 0;
                while (c2 < poly) {
                    w.append("\t" + split[1] + "\t" + "\t");
                    ++c2;
                }
                w.newLine();
                all += 1.0;
                continue;
            }
            String gene = (String)trans2gene.get(split[2]);
            if (gene == null) {
                gene = split[2];
                protocol.append("Warning no gene found for transcript: " + gene + "\n");
            }
            Integer sub = (Integer)gene2subgenome.get(gene);
            if (v >= 0) {
                int[] nArray = stat[sub];
                int n = v;
                nArray[n] = nArray[n] + 1;
                w.append(split[0]);
                int c3 = 0;
                while (c3 < poly) {
                    if (c3 == sub) {
                        w.append("\t" + split[1] + "\t" + gene + "\t" + split[2]);
                    } else {
                        w.append("\tMissing\t\t");
                    }
                    ++c3;
                }
                w.newLine();
                all += 1.0;
                continue;
            }
            if (old == null) {
                old = split[0];
            }
            if ((list = (ArrayList<String>)hash[sub].get(gene)) == null) {
                list = new ArrayList<String>();
                hash[sub].put(gene, list);
            }
            list.add(split[2]);
        } while ((line = r.readLine()) != null);
        if (old != null) {
            BUSCORecomputer.add(old, stat, hash, w);
            all += 1.0;
        }
        r.close();
        w.close();
        NumberFormat nf = NumberFormat.getInstance(Locale.US);
        nf.setMaximumFractionDigits(1);
        int a = (int)all;
        int c4 = 0;
        while (c4 < poly) {
            stat[c4][3] = (int)all - (stat[c4][0] + stat[c4][1] + stat[c4][2]);
            protocol.append(c4 < regex.length ? String.valueOf(regex[c4]) + "\t" : (regex.length == 0 ? "" : String.valueOf(rem) + "\t"));
            protocol.append("C:" + nf.format((double)(stat[c4][0] + stat[c4][1]) / all * 100.0));
            protocol.append("%[S:" + nf.format((double)stat[c4][0] / all * 100.0));
            protocol.append("%,D:" + nf.format((double)stat[c4][1] / all * 100.0));
            protocol.append("%],F:" + nf.format((double)stat[c4][2] / all * 100.0));
            protocol.append("%,M:" + nf.format((double)stat[c4][3] / all * 100.0));
            protocol.append("%,n:" + a + "\n");
            ++c4;
        }
        protocol.append("\n");
        if (regex.length > 0) {
            c4 = 0;
            while (c4 < stat.length) {
                if (c4 < regex.length) {
                    protocol.append("\t" + regex[c4] + "\t");
                } else {
                    protocol.append("\t" + rem + "\t");
                }
                ++c4;
            }
            protocol.append("\n");
        }
        protocol.append(BUSCORecomputer.get(stat, all, nf, "Complete BUSCOs (C)", 0, 1));
        protocol.append(BUSCORecomputer.get(stat, all, nf, "Complete and single-copy BUSCOs (S)", 0));
        protocol.append(BUSCORecomputer.get(stat, all, nf, "Complete and duplicated BUSCOs (D)", 1));
        protocol.append(BUSCORecomputer.get(stat, all, nf, "Fragmented BUSCOs (F)", 2));
        protocol.append(BUSCORecomputer.get(stat, all, nf, "Missing BUSCOs (M)", 3));
        protocol.append("\nTotal BUSCO groups searched\t" + (int)all + "\n");
        return new ToolResult("", "", null, new ResultSet(new TextResult("BUSCO parsed full table", "Result", new FileParameter.FileRepresentation(out.getAbsolutePath()), "tabular", this.getToolName(), null, true)), parameters, this.getToolName(), new Date());
    }

    static void add(String busco, int[][] stat, HashMap<String, ArrayList<String>>[] hash, BufferedWriter w) throws IOException {
        w.append(busco);
        int c = 0;
        while (c < hash.length) {
            int idx;
            switch (hash[c].size()) {
                case 0: {
                    w.append("\tMissing\t\t");
                    idx = 3;
                    break;
                }
                case 1: {
                    w.append("\tComplete");
                    idx = 0;
                    break;
                }
                default: {
                    w.append("\tDuplicated");
                    idx = 1;
                }
            }
            if (idx <= 1) {
                Object[] keys = hash[c].keySet().toArray(new String[0]);
                String genes = Arrays.toString(keys);
                w.append("\t" + genes.substring(1, genes.length() - 1) + "\t");
                int i = 0;
                while (i < keys.length) {
                    ArrayList<String> trans = hash[c].get(keys[i]);
                    int j = 0;
                    while (j < trans.size()) {
                        w.append(String.valueOf(i == 0 && j == 0 ? "" : ",") + trans.get(j));
                        ++j;
                    }
                    ++i;
                }
                int[] nArray = stat[c];
                int n = idx;
                nArray[n] = nArray[n] + 1;
                hash[c].clear();
            }
            ++c;
        }
        w.newLine();
    }

    static String get(int[][] stat, double all, NumberFormat nf, String info, int ... index) {
        StringBuffer sb = new StringBuffer(info);
        int c = 0;
        while (c < stat.length) {
            int sum = 0;
            int i = 0;
            while (i < index.length) {
                sum += stat[c][index[i]];
                ++i;
            }
            sb.append(BUSCORecomputer.get(all, sum, nf));
            ++c;
        }
        sb.append("\n");
        return sb.toString();
    }

    static String get(double all, int anz, NumberFormat nf) {
        return "\t" + anz + "\t" + nf.format((double)anz / all * 100.0) + "%";
    }

    static int getIndex(String value) {
        switch (value) {
            case "Complete": {
                return 0;
            }
            case "Duplicated": {
                return -1;
            }
            case "Fragmented": {
                return 2;
            }
            case "Missing": {
                return 3;
            }
        }
        throw new IllegalArgumentException(value);
    }

    @Override
    public ToolParameterSet getToolParameters() {
        try {
            return new ToolParameterSet(this.getToolName(), new FileParameter("BUSCO", "the BUSCO full table based on transcripts/proteins", "tabular", true, new FileExistsValidator()), new FileParameter("IDs", "a table with at least two columns, the first is the gene ID, the second is the transcript/protein ID. The assignment file from the Extractor can be used or a table can be derived by the user from the gene annotation file (gff,gtf)", "tabular", true, new FileExistsValidator()), new ParameterSetContainer("subgenomes", "", new ExpandableParameterSet(new SimpleParameterSet(new SimpleParameter(DataType.STRING, "subgenome", "regex for contigs/chromosomes of this subgenome", true)), "subgenomes", "regular expression for subgenome contigs/chromsome names", 0)));
        }
        catch (Exception e) {
            e.printStackTrace();
            throw new RuntimeException();
        }
    }

    @Override
    public String getToolName() {
        return this.getClass().getSimpleName();
    }

    @Override
    public String getShortName() {
        return this.getToolName();
    }

    @Override
    public String getDescription() {
        return "recomputes BUSCO statistic for genes";
    }

    @Override
    public String getHelpText() {
        return "This tool can be used to compute BUSCO statistics for genes instead of transcripts. Proteins of an annotation file can be extracted with **Exctractor**, Proteins can be used to compute BUSCO statistics with BUSCO. The full BUSCO table and the assignment file from the **Extractor** can be used as input for this tool. Alternatively, a table can be generated from the annotation file that can be used instead of the assignment file." + MORE;
    }

    @Override
    public JstacsTool.ResultEntry[] getDefaultResultInfos() {
        return new JstacsTool.ResultEntry[]{new JstacsTool.ResultEntry(TextResult.class, "tabular", "BUSCO parsed full table")};
    }

    @Override
    public ToolResult[] getTestCases(String path) {
        return null;
    }
}

