/*
 * Decompiled with CFR 0.152.
 */
package projects.xanthogenomes.tools;

import de.jstacs.DataType;
import de.jstacs.data.AlphabetContainer;
import de.jstacs.data.DataSet;
import de.jstacs.data.WrongAlphabetException;
import de.jstacs.data.alphabets.Alphabet;
import de.jstacs.data.alphabets.DiscreteAlphabet;
import de.jstacs.data.sequences.Sequence;
import de.jstacs.data.sequences.annotation.SequenceAnnotationParser;
import de.jstacs.data.sequences.annotation.SimpleSequenceAnnotationParser;
import de.jstacs.io.AbstractStringExtractor;
import de.jstacs.io.FileManager;
import de.jstacs.io.SparseStringExtractor;
import de.jstacs.io.XMLParser;
import de.jstacs.parameters.FileParameter;
import de.jstacs.parameters.ParameterSet;
import de.jstacs.parameters.SelectionParameter;
import de.jstacs.parameters.SimpleParameterSet;
import de.jstacs.results.CategoricalResult;
import de.jstacs.results.ListResult;
import de.jstacs.results.NumericalResult;
import de.jstacs.results.Result;
import de.jstacs.results.ResultSet;
import de.jstacs.results.ResultSetResult;
import de.jstacs.tools.JstacsTool;
import de.jstacs.tools.ProgressUpdater;
import de.jstacs.tools.Protocol;
import de.jstacs.tools.ToolParameterSet;
import de.jstacs.tools.ToolResult;
import de.jstacs.utils.ComparableElement;
import de.jstacs.utils.IntList;
import de.jstacs.utils.Pair;
import java.io.IOException;
import java.io.StringReader;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Comparator;
import java.util.Date;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.LinkedList;
import projects.talen.InfixMatchFinder;
import projects.talen.MatchFinder;
import projects.tals.ScanForTBSCLI;
import projects.tals.TALgetterDiffSM;
import projects.tals.TBSScanner;
import projects.xanthogenomes.RVDAlphabetContainer;
import projects.xanthogenomes.TALE;
import projects.xanthogenomes.TALEFamilyBuilder;
import projects.xanthogenomes.tools.ClassBuilderTool;
import projects.xanthogenomes.tools.TALEPredictionTool;

public class PredictAndIntersectTargetsTool
implements JstacsTool {
    @Override
    public ToolParameterSet getToolParameters() {
        FileParameter genome = new FileParameter("Input sequences", "Sequences, e.g., promoters, to scan for TALE target sites", "fasta,fa,fas", true);
        FileParameter builderFile = new FileParameter("Class builder", "TALE class builder definition", "xml", true);
        builderFile.setExtendedType(TALEFamilyBuilder.class.getName());
        FileParameter tales = new FileParameter("TALE sequences", "TALE sequences, either as complete DNA or AS sequences (e.g., output of TALE Prediction) or as RVD sequences.", "fasta,fa,fas", true);
        SelectionParameter selPar = null;
        try {
            selPar = new SelectionParameter(DataType.PARAMETERSET, new String[]{"TALEs in FastA", "TALEs in class builder"}, new ParameterSet[]{new SimpleParameterSet(tales), new SimpleParameterSet(builderFile)}, null, "Predictions for", "Predict and intersect targets for all TALEs in a given FastA input file or for all TALEs in all classes defined by a class builder.", true);
        }
        catch (Exception e) {
            e.printStackTrace();
            return null;
        }
        return new ToolParameterSet(this.getShortName(), genome, selPar);
    }

    @Override
    public ToolResult run(ToolParameterSet parameters, Protocol protocol, ProgressUpdater progress, int threads) throws Exception {
        int i;
        progress.setLast(1.0);
        progress.setCurrent(0.0);
        FileParameter fp = (FileParameter)parameters.getParameterAt(0);
        FileParameter.FileRepresentation fr = fp.getFileContents();
        protocol.append("Reading input data...\n");
        DataSet ds = new DataSet(new AlphabetContainer((Alphabet)new DiscreteAlphabet(true, "A", "C", "G", "T", "N", "W", "S", "M", "K", "R", "Y", "B", "D", "H", "V")), new SparseStringExtractor(new StringReader(fr.getContent()), '>', "", (SequenceAnnotationParser)new SimpleSequenceAnnotationParser()));
        Pair<int[][], DataSet> pair = TBSScanner.preprocess(ds);
        int[][] offsets = pair.getFirstElement();
        ds = pair.getSecondElement();
        progress.setCurrent(0.1);
        protocol.append("...finished.\n\n");
        int cap = 500;
        SelectionParameter selPar = (SelectionParameter)parameters.getParameterAt(1);
        StringBuffer[] sbs = null;
        String[] classNames = null;
        protocol.append("Collecting TALE RVD sequences...\n");
        TALE[][] allTales = null;
        if (selPar.getSelected() == 0) {
            StringBuffer sb = null;
            try {
                TALE[] tales = ClassBuilderTool.readProteinTALEs(((FileParameter)((ParameterSet)selPar.getValue()).getParameterAt(0)).getFileContents(), protocol);
                tales = this.filterTALEsByLength(tales, protocol);
                sb = new StringBuffer();
                int i2 = 0;
                while (i2 < tales.length) {
                    sb.append(">" + tales[i2].getId() + "\n");
                    Sequence rvds = tales[i2].getRvdSequence();
                    sb.append(String.valueOf(rvds.toString("-", 0, rvds.getLength())) + "\n");
                    ++i2;
                }
                allTales = new TALE[][]{tales};
            }
            catch (Exception e) {
                sb = new StringBuffer();
                sb.append(((FileParameter)((ParameterSet)selPar.getValue()).getParameterAt(0)).getFileContents().getContent());
            }
            sbs = new StringBuffer[]{sb};
        } else {
            TALEFamilyBuilder builder = new TALEFamilyBuilder(new StringBuffer(((FileParameter)((ParameterSet)selPar.getValue()).getParameterAt(0)).getFileContents().getContent()));
            Object[] fams = builder.getFamilies();
            Arrays.sort(fams);
            classNames = new String[fams.length];
            sbs = new StringBuffer[fams.length];
            allTales = new TALE[fams.length][];
            int j = 0;
            while (j < fams.length) {
                classNames[j] = ((TALEFamilyBuilder.TALEFamily)fams[j]).getFamilyId();
                TALE[] tales = ((TALEFamilyBuilder.TALEFamily)fams[j]).getFamilyMembers();
                tales = this.filterTALEsByLength(tales, protocol);
                allTales[j] = tales;
                StringBuffer sb = new StringBuffer();
                int i3 = 0;
                while (i3 < tales.length) {
                    sb.append(">" + tales[i3].getId() + "\n");
                    Sequence rvds = tales[i3].getRvdSequence();
                    sb.append(String.valueOf(rvds.toString("-", 0, rvds.getLength())) + "\n");
                    ++i3;
                }
                sbs[j] = sb;
                ++j;
            }
        }
        TALgetterDiffSM model = (TALgetterDiffSM)XMLParser.extractObjectForTags(FileManager.readInputStream(ScanForTBSCLI.class.getClassLoader().getResourceAsStream("projects/xanthogenomes/talfinder_obg2_hyp_bg.xml")), "model");
        DiscreteAlphabet modelAlph = (DiscreteAlphabet)model.getRVDAlphabet().getAlphabetAt(0);
        HashSet<String> newSyms = new HashSet<String>();
        double totNum = 0.0;
        int t = 0;
        while (t < sbs.length) {
            DataSet tals = new DataSet((AlphabetContainer)RVDAlphabetContainer.SINGLETON, (AbstractStringExtractor)new SparseStringExtractor(new StringReader(sbs[t].toString()), '>', "", (SequenceAnnotationParser)new SimpleSequenceAnnotationParser()), "-");
            i = 0;
            while (i < tals.getNumberOfElements()) {
                totNum += 1.0;
                Sequence rvds = tals.getElementAt(i);
                int j = 0;
                while (j < rvds.getLength()) {
                    String sym = rvds.toString(j, j + 1);
                    if (!modelAlph.isSymbol(sym) && !newSyms.contains(sym)) {
                        newSyms.add(sym);
                    }
                    ++j;
                }
                ++i;
            }
            ++t;
        }
        String[] ns = newSyms.toArray(new String[0]);
        double[][] specs = new double[ns.length][];
        i = 0;
        while (i < ns.length) {
            specs[i] = new double[]{0.25, 0.25, 0.25, 0.25};
            ++i;
        }
        model.addAndSet(ns, specs, null);
        model.fix();
        progress.setCurrent(0.2);
        protocol.append("...finished.\n\n");
        double fac = 1.0 / totNum * 0.8;
        totNum = 0.0;
        Result[] res = new Result[sbs.length];
        ResultSet set = null;
        protocol.append("Predicting targets for\n");
        int s = 0;
        while (s < sbs.length) {
            DataSet tals = new DataSet(model.getRVDAlphabet(), (AbstractStringExtractor)new SparseStringExtractor(new StringReader(sbs[s].toString()), '>', "", (SequenceAnnotationParser)new SimpleSequenceAnnotationParser()), "-");
            MatchFinder.Match[][] allMatches = new MatchFinder.Match[tals.getNumberOfElements()][];
            String[] talNames = new String[tals.getNumberOfElements()];
            int off = tals.getNumberOfElements() > 1 ? 1 : 0;
            ListResult[] lires = new ListResult[tals.getNumberOfElements() + off];
            int t2 = 0;
            while (t2 < tals.getNumberOfElements()) {
                Sequence tal2;
                Sequence tal = tals.getElementAt(t2);
                talNames[t2] = (String)tal.getSequenceAnnotationByType("unparsed comment line", 0).getResultForName("unparsed comment").getValue();
                protocol.append(String.valueOf(talNames[t2]) + "\n");
                ComparableElement<MatchFinder.Match, Double>[] list = PredictAndIntersectTargetsTool.predict(model, tal, cap, ds, offsets);
                if (allTales != null && allTales[s][t2].containsAberrantRepeat() && (tal2 = this.removeAbberant(tal, allTales[s][t2])).getLength() > 3) {
                    ComparableElement<MatchFinder.Match, Double>[] list2 = PredictAndIntersectTargetsTool.predict(model, tal2, cap, ds, offsets);
                    ComparableElement<MatchFinder.Match, Double>[] list3 = PredictAndIntersectTargetsTool.join(tal, tal2, list, list2, cap);
                    list = list3;
                }
                allMatches[t2] = new MatchFinder.Match[list.length];
                ResultSet[] re = new ResultSet[list.length];
                int i4 = 0;
                while (i4 < list.length) {
                    MatchFinder.Match m;
                    allMatches[t2][i4] = m = list[list.length - 1 - i4].getElement();
                    double score = list[list.length - 1 - i4].getWeight();
                    int seqIdx = m.getSeqIdx();
                    int pos = m.getSeqPos() + offsets[0][seqIdx];
                    String id = ds.getElementAt(seqIdx).getSequenceAnnotationByType("unparsed comment line", 0).getResultForName("unparsed comment").getValue().toString().trim();
                    Sequence currTal = m.getTal() == null ? tal : m.getTal();
                    Sequence ts = ds.getElementAt(seqIdx).getSubSequence(m.getSeqPos(), currTal.getLength() + 1);
                    re[i4] = new ResultSet(new Result[][]{{new CategoricalResult("Sequence ID", "", id), new NumericalResult("Position", "", pos), new NumericalResult("Score", "", score), new CategoricalResult("Site", "", ts.toString()), new CategoricalResult("Match string", "", String.valueOf(model.getMatchString(currTal, ts)) + (m.getTal() == null ? "" : "-a"))}});
                    ++i4;
                }
                lires[t2 + off] = new ListResult("Predictions for " + talNames[t2], "TALgetter predictions of target sites for " + talNames[t2], null, re);
                progress.setCurrent(0.2 + fac * totNum);
                ++t2;
                totNum += 1.0;
            }
            if (off > 0) {
                ListResult intersection;
                lires[0] = intersection = this.intersect(ds, offsets, talNames, allMatches);
            }
            if (sbs.length == 1) {
                set = new ResultSet(new Result[][]{lires});
            } else {
                res[s] = new ResultSetResult("Predictions for class " + classNames[s], "Predicted target sites for all TALEs in class " + classNames[s], null, new ResultSet(new Result[][]{lires}));
            }
            ++s;
        }
        if (set == null) {
            set = new ResultSet(new Result[][]{res});
        }
        progress.setCurrent(1.0);
        return new ToolResult("Result of " + this.getToolName(), String.valueOf(this.getToolName()) + " on \"" + fr.getFilename() + "\"", null, set, parameters, this.getToolName(), new Date(System.currentTimeMillis()));
    }

    private TALE[] filterTALEsByLength(TALE[] tales, Protocol protocol) {
        ArrayList<TALE> temp = new ArrayList<TALE>();
        int i = 0;
        while (i < tales.length) {
            if (tales[i].getNumberOfRepeats() > 3) {
                temp.add(tales[i]);
            } else {
                protocol.appendWarning("TALE " + tales[i].getId() + " ignored as it has less than 4 repeats.\n");
            }
            ++i;
        }
        return temp.toArray(new TALE[0]);
    }

    private static ComparableElement<MatchFinder.Match, Double>[] join(Sequence tal, Sequence tal2, ComparableElement<MatchFinder.Match, Double>[] list, ComparableElement<MatchFinder.Match, Double>[] list2, int cap) {
        double correct = (double)(tal.getLength() - tal2.getLength()) * Math.log(0.25);
        ComparableElement[] list3 = new ComparableElement[cap];
        int k = list3.length - 1;
        int i = list.length - 1;
        int j = list2.length - 1;
        while (k >= 0) {
            if (i == 0) {
                list3[k] = list2[j];
                ((MatchFinder.Match)list3[k].getElement()).setTal(tal2);
                --j;
            } else if (j == 0) {
                list3[k] = list[i];
                --i;
            } else {
                double d1 = list[i].getWeight();
                double d2 = list2[j].getWeight();
                if (d1 > (d2 += correct)) {
                    list3[k] = list[i];
                    --i;
                } else {
                    list3[k] = new ComparableElement<MatchFinder.Match, Double>(list2[j].getElement(), d2);
                    ((MatchFinder.Match)list3[k].getElement()).setTal(tal2);
                    --j;
                }
            }
            --k;
        }
        return list3;
    }

    private Sequence removeAbberant(Sequence tal, TALE tale) throws IllegalArgumentException, WrongAlphabetException {
        String temp = tal.toString("-", 0, tal.getLength());
        String[] parts = temp.split("-");
        StringBuffer tal2 = new StringBuffer();
        int i = 0;
        while (i < tale.getNumberOfRepeats()) {
            if (tale.getRepeat(i).getType() == TALE.Type.UNKNOWN || tale.getRepeat(i).getType() == TALE.Type.NORMAL || i == tale.getNumberOfRepeats() - 1) {
                tal2.append(parts[i]);
                tal2.append("-");
            }
            ++i;
        }
        if (tal2.length() > 0) {
            tal2.delete(tal2.length() - 1, tal2.length());
        }
        Sequence res = Sequence.create(tal.getAlphabetContainer(), tal2.toString(), "-");
        return res;
    }

    private static ComparableElement<MatchFinder.Match, Double>[] predict(TALgetterDiffSM model, Sequence tal, int cap, DataSet ds, int[][] offsets) {
        InfixMatchFinder singleFind = new InfixMatchFinder(null, Math.min(8, tal.getLength()), model);
        ComparableElement[] list = new ComparableElement[]{};
        double bestRelScore = model.getBestPossibleScore(tal, null) / (double)(tal.getLength() + 1);
        double rat = 0.5;
        while (list.length < cap && rat > 0.1) {
            double singleThresh = bestRelScore + Math.log(rat);
            singleFind.getPreps(tal, singleThresh * (double)(tal.getLength() + 1));
            singleFind.setDataSet(ds);
            list = singleFind.getScoresAbove(tal, singleThresh * (double)(tal.getLength() + 1), cap, true, false).getSortedList();
            rat /= 1.5;
        }
        return list;
    }

    private ListResult intersect(DataSet ds, int[][] offsets, String[] talNames, MatchFinder.Match[][] allMatches) {
        HashMap map = new HashMap();
        int i = 0;
        while (i < allMatches.length) {
            int j = 0;
            while (j < allMatches[i].length) {
                MatchFinder.Match m = allMatches[i][j];
                int seqIdx = m.getSeqIdx();
                int pos = m.getSeqPos() + offsets[0][seqIdx];
                String id = ds.getElementAt(seqIdx).getSequenceAnnotationByType("unparsed comment line", 0).getResultForName("unparsed comment").getValue().toString().trim();
                if (!map.containsKey(id)) {
                    LinkedList li = new LinkedList();
                    map.put(id, li);
                }
                ((LinkedList)map.get(id)).add(new int[]{i, pos, j + 1});
                ++j;
            }
            ++i;
        }
        Iterator matches = map.keySet().iterator();
        Inter[] res = new Inter[map.size()];
        int k = 0;
        while (matches.hasNext()) {
            String m = (String)matches.next();
            Inter inter = new Inter(m, allMatches.length);
            LinkedList li = (LinkedList)map.get(m);
            int i2 = 0;
            while (i2 < li.size()) {
                int[] t = (int[])li.get(i2);
                inter.add(t[0], t[2], t[1]);
                ++i2;
            }
            res[k] = inter;
            ++k;
        }
        Arrays.sort(res, new Comparator<Inter>(){

            @Override
            public int compare(Inter o1, Inter o2) {
                int c = -new Integer(o1.getNum()).compareTo(new Integer(o2.getNum()));
                if (c == 0) {
                    Double rp1 = o1.getRankProduct();
                    Double rp2 = o2.getRankProduct();
                    return rp1.compareTo(rp2);
                }
                return c;
            }
        });
        LinkedList<ResultSet> rsl = new LinkedList<ResultSet>();
        int i3 = 0;
        while (i3 < res.length) {
            int num = res[i3].getNum();
            String id = res[i3].id;
            int len = res[i3].getLength();
            Result[] rs = new Result[len + 2];
            rs[0] = new CategoricalResult("Sequence ID", "", id);
            rs[1] = new NumericalResult("Intersection size", "", num);
            int j = 0;
            while (j < len) {
                rs[j + 2] = new CategoricalResult(talNames[j], "", res[i3].getString(j));
                ++j;
            }
            rsl.add(new ResultSet(new Result[][]{rs}));
            ++i3;
        }
        return new ListResult("Overlapping target sites", "Overlapping target sites between TALEs according to TALgetter predictions", null, rsl.toArray(new ResultSet[0]));
    }

    @Override
    public String getToolName() {
        return "Predict and Intersect Targets";
    }

    @Override
    public String getShortName() {
        return "targets";
    }

    @Override
    public String getDescription() {
        return "Predicts target sites using TALgetter and intersects targets between different TALEs";
    }

    @Override
    public String getHelpText() {
        try {
            return FileManager.readInputStream(TALEPredictionTool.class.getClassLoader().getResourceAsStream("projects/xanthogenomes/tools/PredictAndIntersectTargetsTool.txt")).toString();
        }
        catch (IOException e) {
            e.printStackTrace();
            return "";
        }
    }

    @Override
    public String getToolVersion() {
        return "1.4.1";
    }

    @Override
    public JstacsTool.ResultEntry[] getDefaultResultInfos() {
        return null;
    }

    @Override
    public ToolResult[] getTestCases(String path) {
        return null;
    }

    @Override
    public void clear() {
    }

    @Override
    public String[] getReferences() {
        return null;
    }

    private static class Inter {
        String id;
        IntList[] ranks;
        IntList[] pos;

        public Inter(String id, int len) {
            this.id = id;
            this.ranks = new IntList[len];
            this.pos = new IntList[len];
        }

        public void add(int tal, int rank, int position) {
            if (this.ranks[tal] == null) {
                this.ranks[tal] = new IntList();
            }
            if (this.pos[tal] == null) {
                this.pos[tal] = new IntList();
            }
            this.ranks[tal].add(rank);
            this.pos[tal].add(position);
        }

        public int getLength() {
            return this.ranks.length;
        }

        public int getNum() {
            int num = 0;
            int i = 0;
            while (i < this.ranks.length) {
                if (this.ranks[i] != null) {
                    ++num;
                }
                ++i;
            }
            return num;
        }

        public double getRankProduct() {
            double rp = 1.0;
            double n = 0.0;
            int i = 0;
            while (i < this.ranks.length) {
                if (this.ranks[i] != null) {
                    double temp = 1.0;
                    int j = 0;
                    while (j < this.ranks[i].length()) {
                        temp *= (double)this.ranks[i].get(j);
                        ++j;
                    }
                    temp = Math.pow(temp, 1.0 / (double)this.ranks[i].length());
                    rp *= temp;
                    n += 1.0;
                }
                ++i;
            }
            return Math.pow(rp, 1.0 / n);
        }

        public String getString(int j) {
            if (this.ranks[j] == null) {
                return "";
            }
            StringBuffer sb = new StringBuffer();
            int i = 0;
            while (i < this.ranks[j].length()) {
                if (i > 0) {
                    sb.append("; ");
                }
                sb.append("(" + this.ranks[j].get(i) + "," + this.pos[j].get(i) + ")");
                ++i;
            }
            return sb.toString();
        }
    }
}

