/*
 * Decompiled with CFR 0.152.
 */
package projects.xanthogenomes;

import de.jstacs.data.DNADataSet;
import de.jstacs.data.sequences.Sequence;
import de.jstacs.data.sequences.annotation.SimpleSequenceAnnotationParser;
import de.jstacs.sequenceScores.statisticalModels.differentiable.homogeneous.HomogeneousMMDiffSM;
import de.jstacs.sequenceScores.statisticalModels.trainable.hmm.AbstractHMM;
import de.jstacs.sequenceScores.statisticalModels.trainable.hmm.HMMFactory;
import de.jstacs.utils.ComparableElement;
import de.jstacs.utils.DoubleList;
import de.jstacs.utils.IntList;
import de.jstacs.utils.Pair;
import de.jstacs.utils.ToolBox;
import java.io.InputStreamReader;
import java.io.PrintWriter;
import java.util.Arrays;
import java.util.HashSet;
import java.util.LinkedList;
import projects.xanthogenomes.tools.TALEPredictionTool;

public class NHMMerLoose {
    public static void main(String[] args) throws Exception {
        SimpleSequenceAnnotationParser parser = new SimpleSequenceAnnotationParser();
        DNADataSet ds = new DNADataSet(args[0], '>', parser);
        StringBuffer repeatConsensus = new StringBuffer();
        Pair<AbstractHMM, HomogeneousMMDiffSM> repeats = HMMFactory.parseProfileHMMFromHMMer(new InputStreamReader(TALEPredictionTool.class.getClassLoader().getResourceAsStream("projects/xanthogenomes/data/repeats.hmm")), repeatConsensus, null, null);
        StringBuffer startConsensus = new StringBuffer();
        LinkedList<Integer> startMatchStates = new LinkedList<Integer>();
        LinkedList<Integer> startSilentStates = new LinkedList<Integer>();
        Pair<AbstractHMM, HomogeneousMMDiffSM> start = HMMFactory.parseProfileHMMFromHMMer(new InputStreamReader(TALEPredictionTool.class.getClassLoader().getResourceAsStream("projects/xanthogenomes/data/starts.hmm")), startConsensus, startMatchStates, startSilentStates);
        StringBuffer endConsensus = new StringBuffer();
        LinkedList<Integer> endMatchStates = new LinkedList<Integer>();
        LinkedList<Integer> endSilentStates = new LinkedList<Integer>();
        Pair<AbstractHMM, HomogeneousMMDiffSM> end = HMMFactory.parseProfileHMMFromHMMer(new InputStreamReader(TALEPredictionTool.class.getClassLoader().getResourceAsStream("projects/xanthogenomes/data/ends.hmm")), endConsensus, endMatchStates, endSilentStates);
        PrintWriter wr = new PrintWriter(String.valueOf(args[0]) + "_stretch.fasta");
        int i = 0;
        while (i < ds.getNumberOfElements()) {
            Sequence seq = ds.getElementAt(i);
            int[][] res = NHMMerLoose.run(repeats, start, end, repeatConsensus, startConsensus, endConsensus, startMatchStates, startSilentStates, endMatchStates, endSilentStates, seq);
            if (res.length > 0) {
                int first = seq.getLength();
                int last = 0;
                int j = 0;
                while (j < res.length) {
                    if (res[j][2] > 0) {
                        if (res[j][0] < first) {
                            first = res[j][0];
                        }
                        if (res[j][1] > last) {
                            last = res[j][1];
                        }
                    } else {
                        int temp1 = seq.getLength() - res[j][1];
                        int temp2 = seq.getLength() - res[j][0];
                        if (temp1 < first) {
                            first = temp1;
                        }
                        if (temp2 > last) {
                            last = temp2;
                        }
                    }
                    ++j;
                }
                if (first > 1000 && seq.getLength() - last > 1000) {
                    System.out.println(String.valueOf(i) + " " + first + " " + last + " " + seq.getLength());
                    wr.println(parser.parseAnnotationToComment('>', seq.getAnnotation()));
                    wr.println(seq.toString());
                    wr.flush();
                }
            }
            ++i;
        }
        wr.close();
    }

    public static int[][] run(Pair<AbstractHMM, HomogeneousMMDiffSM> repeats, Pair<AbstractHMM, HomogeneousMMDiffSM> start, Pair<AbstractHMM, HomogeneousMMDiffSM> end, StringBuffer repeatConsensus, StringBuffer startConsensus, StringBuffer endConsensus, LinkedList<Integer> startMatchStates, LinkedList<Integer> startSilentStates, LinkedList<Integer> endMatchStates, LinkedList<Integer> endSilentStates, Sequence seq) throws Exception {
        int[] cterm;
        int[] nterm;
        int[] refinestartend;
        int[] curr;
        LinkedList<int[]> fwd = NHMMerLoose.findRepeats(seq, repeats.getFirstElement(), repeats.getSecondElement(), repeatConsensus.toString());
        LinkedList<int[]> rev = NHMMerLoose.findRepeats(seq.reverseComplement(), repeats.getFirstElement(), repeats.getSecondElement(), repeatConsensus.toString());
        int totalNum = fwd.size() + rev.size();
        double k = 0.0;
        LinkedList<int[]> list = new LinkedList<int[]>();
        int i = 0;
        while (i < fwd.size()) {
            curr = fwd.get(i);
            refinestartend = new int[]{curr[1], curr[2]};
            nterm = NHMMerLoose.getBestTerminus(seq, curr[1], curr[2], true, true, start.getFirstElement(), start.getSecondElement(), startConsensus.toString(), startMatchStates, startSilentStates);
            if (nterm != null) {
                refinestartend[0] = nterm[0];
            }
            if ((cterm = NHMMerLoose.getBestTerminus(seq, curr[1], curr[2], true, false, end.getFirstElement(), end.getSecondElement(), endConsensus.toString(), endMatchStates, endSilentStates)) != null) {
                refinestartend[1] = cterm[1];
            }
            list.add(new int[]{refinestartend[0], refinestartend[1], 1});
            ++i;
            k += 1.0;
        }
        i = 0;
        while (i < rev.size()) {
            curr = rev.get(i);
            refinestartend = new int[]{curr[1], curr[2]};
            nterm = NHMMerLoose.getBestTerminus(seq, curr[1], curr[2], false, true, start.getFirstElement(), start.getSecondElement(), startConsensus.toString(), startMatchStates, startSilentStates);
            if (nterm != null) {
                refinestartend[0] = nterm[0];
            }
            if ((cterm = NHMMerLoose.getBestTerminus(seq, curr[1], curr[2], false, false, end.getFirstElement(), end.getSecondElement(), endConsensus.toString(), endMatchStates, endSilentStates)) != null) {
                refinestartend[1] = cterm[1];
            }
            list.add(new int[]{refinestartend[0], refinestartend[1], -1});
            ++i;
            k += 1.0;
        }
        LinkedList<int[]> toRemove = new LinkedList<int[]>();
        int i2 = 1;
        while (i2 < list.size()) {
            int[] temp2;
            int[] temp = (int[])list.get(i2 - 1);
            if (temp[2] == (temp2 = (int[])list.get(i2))[2]) {
                if (temp[0] >= temp2[0] && temp[1] <= temp2[1]) {
                    toRemove.add(temp);
                } else if (temp[0] <= temp2[0] && temp[1] >= temp2[1]) {
                    toRemove.add(temp2);
                }
            }
            ++i2;
        }
        list.removeAll(toRemove);
        return (int[][])list.toArray((T[])new int[0][]);
    }

    public static int[] getBestTerminus(Sequence seq, int start, int end, boolean fwd, boolean isStart, AbstractHMM hmm, HomogeneousMMDiffSM hom, String consensus, LinkedList<Integer> matchStates, LinkedList<Integer> silentStates) throws Exception {
        int endIdx;
        double rat;
        double bg;
        double fg;
        int i;
        int numLower;
        int numLay = consensus.length();
        int w = numLay = (int)Math.round((double)numLay * 1.1);
        double t = (double)consensus.length() * Math.log(1.1);
        if (!fwd) {
            seq = seq.reverseComplement();
        }
        DoubleList scores = new DoubleList();
        IntList positions = new IntList();
        if (isStart) {
            numLower = 0;
            i = start - w + (int)Math.round(0.1 * (double)consensus.length());
            while (i >= Math.max(0, start - w - 200)) {
                fg = hmm.getLogProbFor(seq, i, i + w - 1);
                bg = hom.getLogProbFor(seq, i, i + w - 1);
                rat = fg - bg;
                numLower = scores.length() > 0 && rat < scores.get(scores.length() - 1) ? ++numLower : 0;
                if (numLower <= 10) {
                    scores.add(rat);
                    positions.add(i);
                    i -= 5;
                    continue;
                }
                break;
            }
        } else {
            numLower = 0;
            i = end - (int)Math.round(0.1 * (double)consensus.length());
            while (i < Math.min(end + 200, seq.getLength() - w + 1)) {
                fg = hmm.getLogProbFor(seq, i, i + w - 1);
                bg = hom.getLogProbFor(seq, i, i + w - 1);
                rat = fg - bg;
                numLower = scores.length() > 0 && rat < scores.get(scores.length() - 1) ? ++numLower : 0;
                if (numLower <= 10) {
                    scores.add(rat);
                    positions.add(i);
                    i += 5;
                    continue;
                }
                break;
            }
        }
        if (positions.length() == 0) {
            return null;
        }
        int idx = ToolBox.getMaxIndex(scores.toArray());
        int[] region = new int[2];
        Pair<IntList, Double> vit = hmm.getViterbiPathFor(positions.get(idx), positions.get(idx) + w - 1, seq);
        IntList states = vit.getFirstElement();
        double[] count = new double[states.length()];
        int i2 = 0;
        while (i2 < states.length()) {
            count[i2] = matchStates.contains(states.get(i2)) ? (i2 > 0 ? count[i2 - 1] + 1.0 : 1.0) : (i2 > 0 && count[i2 - 1] > 0.0 ? count[i2 - 1] - 1.0 : 0.0);
            ++i2;
        }
        int startIdx = endIdx = ToolBox.getMaxIndex(count);
        while (startIdx >= 0 && count[startIdx] > 0.0) {
            --startIdx;
        }
        int offStart = 0;
        int i3 = 0;
        while (i3 < startIdx) {
            if (!silentStates.contains(states.get(i3))) {
                ++offStart;
            }
            ++i3;
        }
        int offEnd = 0;
        int i4 = states.length() - 1;
        while (i4 > endIdx) {
            if (!silentStates.contains(states.get(i4))) {
                ++offEnd;
            }
            --i4;
        }
        region[0] = positions.get(idx) + offStart;
        region[1] = positions.get(idx) + w - offEnd;
        return region;
    }

    public static LinkedList<int[]> findRepeats(Sequence seq, AbstractHMM hmm, HomogeneousMMDiffSM hom, String consensus) throws Exception {
        int j;
        int totalLength = seq.getLength();
        int numLay = consensus.length();
        int w = numLay = (int)Math.round((double)numLay * 1.1);
        int frag = 10;
        HashSet<String> parts = new HashSet<String>();
        int i = 0;
        while (i < consensus.length() / frag) {
            parts.add(consensus.substring(i * frag, (i + 1) * frag).toUpperCase());
            ++i;
        }
        double t = (double)consensus.length() * Math.log(1.1);
        LinkedList<int[]> found = new LinkedList<int[]>();
        double l = 0.0;
        double[] vals = new double[seq.getLength() - w + 1];
        int num = -1;
        int j2 = 0;
        while (j2 < seq.getLength() - w + 1) {
            Sequence sub = seq.getSubSequence(j2, w);
            if (num == -1) {
                num = 0;
                String substr = sub.toString();
                String[] parts2 = parts.toArray(new String[0]);
                int k = 0;
                while (k < parts2.length) {
                    if (substr.indexOf(parts2[k]) > -1) {
                        ++num;
                    }
                    ++k;
                }
            }
            if (num > parts.size() / 4) {
                double rat;
                double fg = hmm.getLogProbFor(sub);
                double bg = hom.getLogProbFor(sub);
                vals[j2] = rat = fg - bg;
            }
            if (j2 < seq.getLength() - w) {
                String substr1 = seq.toString(j2, j2 + frag);
                String substr2 = seq.toString(j2 + w - frag + 1, j2 + w + 1);
                if (parts.contains(substr1)) {
                    --num;
                }
                if (parts.contains(substr2)) {
                    ++num;
                }
            }
            ++j2;
            l += 1.0;
        }
        LinkedList<ComparableElement<Double, Integer>> list = new LinkedList<ComparableElement<Double, Integer>>();
        while (true) {
            int maxIdx = ToolBox.getMaxIndex(vals);
            double max = vals[maxIdx];
            j = Math.max(0, maxIdx - w / 2);
            while (j < maxIdx + w / 2 && j < vals.length) {
                vals[j] = 0.0;
                ++j;
            }
            if (!(max > t)) break;
            list.add(new ComparableElement<Double, Integer>(max, maxIdx));
        }
        Object[] els = list.toArray(new ComparableElement[0]);
        Arrays.sort(els);
        if (els.length > 0) {
            int start = (Integer)((ComparableElement)els[0]).getWeight();
            int end = (Integer)((ComparableElement)els[0]).getWeight() + consensus.length();
            if (els.length > 1) {
                j = 1;
                while (j < els.length) {
                    if ((Integer)((ComparableElement)els[j]).getWeight() - 1000 > (Integer)((ComparableElement)els[j - 1]).getWeight() || j == els.length - 1) {
                        end = (Integer)((ComparableElement)els[j - 1]).getWeight() + consensus.length();
                        int[] nArray = new int[3];
                        nArray[1] = start;
                        nArray[2] = end;
                        found.add(nArray);
                        start = (Integer)((ComparableElement)els[j]).getWeight();
                        end = (Integer)((ComparableElement)els[j]).getWeight() + consensus.length();
                    }
                    ++j;
                }
            } else {
                int[] nArray = new int[3];
                nArray[1] = start;
                nArray[2] = end;
                found.add(nArray);
            }
        }
        return found;
    }
}

