/*
 * Decompiled with CFR 0.152.
 */
package projects.xanthogenomes;

import de.jstacs.data.alphabets.DNAAlphabetContainer;
import de.jstacs.data.sequences.Sequence;
import de.jstacs.sequenceScores.statisticalModels.differentiable.homogeneous.HomogeneousMMDiffSM;
import de.jstacs.sequenceScores.statisticalModels.trainable.hmm.AbstractHMM;
import de.jstacs.sequenceScores.statisticalModels.trainable.hmm.HMMFactory;
import de.jstacs.utils.Pair;
import java.io.BufferedReader;
import java.io.FileReader;
import java.io.InputStreamReader;
import java.io.PrintWriter;
import java.util.HashSet;
import projects.xanthogenomes.tools.TALEPredictionTool;

public class FilterPacBioReads {
    public static void main(String[] args) throws Exception {
        InputStreamReader repeatHMMer = new InputStreamReader(TALEPredictionTool.class.getClassLoader().getResourceAsStream("projects/xanthogenomes/data/repeats.hmm"));
        StringBuffer consensus = new StringBuffer();
        Pair<AbstractHMM, HomogeneousMMDiffSM> repeats = HMMFactory.parseProfileHMMFromHMMer(repeatHMMer, consensus, null, null);
        int frag = 10;
        HashSet<String> parts = new HashSet<String>();
        int i = 0;
        while (i < consensus.length() / frag) {
            parts.add(consensus.substring(i * frag, (i + 1) * frag).toUpperCase());
            ++i;
        }
        BufferedReader read = new BufferedReader(new FileReader(args[0]));
        PrintWriter no = new PrintWriter(String.valueOf(args[0]) + "_norepeats.fastq");
        PrintWriter re = new PrintWriter(String.valueOf(args[0]) + "_repeats.fastq");
        String str = null;
        int i2 = 0;
        String head = null;
        String seq = null;
        String qual = null;
        boolean use = false;
        while ((str = read.readLine()) != null) {
            if (i2 % 4 == 0) {
                head = str;
            } else if ((i2 - 1) % 4 == 0) {
                seq = str;
                use = !FilterPacBioReads.findRepeats(Sequence.create(DNAAlphabetContainer.SINGLETON, seq), repeats.getFirstElement(), repeats.getSecondElement(), parts, frag, consensus.length());
                use &= !FilterPacBioReads.findRepeats(Sequence.create(DNAAlphabetContainer.SINGLETON, seq).reverseComplement(), repeats.getFirstElement(), repeats.getSecondElement(), parts, frag, consensus.length());
            } else if ((i2 - 3) % 4 == 0) {
                qual = str;
                PrintWriter temp = null;
                temp = use ? no : re;
                temp.println(head);
                temp.println(seq);
                temp.println("+");
                temp.println(qual);
            }
            ++i2;
        }
        no.close();
        re.close();
        read.close();
    }

    public static boolean findRepeats(Sequence seq, AbstractHMM hmm, HomogeneousMMDiffSM hom, HashSet<String> parts, int frag, int consensusLength) throws Exception {
        int numLay = consensusLength;
        int w = numLay = (int)Math.round((double)numLay * 1.1);
        double t = (double)consensusLength * Math.log(1.2);
        double maxVal = Double.NEGATIVE_INFINITY;
        int num = -1;
        int j = 0;
        while (j < seq.getLength() - w + 1) {
            double bg;
            double fg;
            double rat;
            Sequence sub = seq.getSubSequence(j, w);
            if (num == -1) {
                num = 0;
                String substr = sub.toString();
                String[] parts2 = parts.toArray(new String[0]);
                int k = 0;
                while (k < parts2.length) {
                    if (substr.indexOf(parts2[k]) > -1) {
                        ++num;
                    }
                    ++k;
                }
            }
            if (num > parts.size() / 3 && (rat = (fg = hmm.getLogProbFor(sub)) - (bg = hom.getLogProbFor(sub))) > maxVal) {
                maxVal = rat;
            }
            if (j < seq.getLength() - w) {
                String substr1 = seq.toString(j, j + frag);
                String substr2 = seq.toString(j + w - frag + 1, j + w + 1);
                if (parts.contains(substr1)) {
                    --num;
                }
                if (parts.contains(substr2)) {
                    ++num;
                }
            }
            ++j;
        }
        return maxVal > t;
    }
}

