/*
 * Decompiled with CFR 0.152.
 */
package projects.talGA;

import de.jstacs.data.AlphabetContainer;
import de.jstacs.data.DataSet;
import de.jstacs.data.alphabets.DNAAlphabetContainer;
import de.jstacs.data.alphabets.DiscreteAlphabet;
import de.jstacs.data.sequences.Sequence;
import de.jstacs.data.sequences.SimpleDiscreteSequence;
import de.jstacs.data.sequences.SparseSequence;
import de.jstacs.data.sequences.annotation.ReferenceSequenceAnnotation;
import de.jstacs.io.FileManager;
import de.jstacs.io.XMLParser;
import de.jstacs.optimization.geneticAlgorithms.GeneticAlgorithm2;
import de.jstacs.optimization.geneticAlgorithms.fitnessFunctions.CachingFitnessFunction;
import de.jstacs.optimization.geneticAlgorithms.fitnessFunctions.CombinedFitnessFunction;
import de.jstacs.optimization.geneticAlgorithms.fitnessFunctions.FitnessFunction;
import de.jstacs.optimization.geneticAlgorithms.fitnessFunctions.MultiThreadedFitnessFunction;
import de.jstacs.optimization.geneticAlgorithms.initialization.DiscreteSequencePopulationInitializer;
import de.jstacs.optimization.geneticAlgorithms.initialization.FixedPopulationInitializer;
import de.jstacs.optimization.geneticAlgorithms.operations.Operation;
import de.jstacs.optimization.geneticAlgorithms.operations.SimpleCrossover;
import de.jstacs.optimization.geneticAlgorithms.operations.SimpleInDel;
import de.jstacs.optimization.geneticAlgorithms.operations.SimpleShift;
import de.jstacs.optimization.geneticAlgorithms.operations.SymbolDependentMutation;
import de.jstacs.optimization.geneticAlgorithms.populations.DiscreteSequencePopulation;
import de.jstacs.optimization.geneticAlgorithms.populations.Population;
import de.jstacs.optimization.geneticAlgorithms.populations.individuals.DiscreteSequenceIndividual;
import de.jstacs.optimization.geneticAlgorithms.selection.RandomByFitnessSelection;
import de.jstacs.results.Result;
import de.jstacs.utils.Normalisation;
import de.jstacs.utils.SafeOutputStream;
import java.io.BufferedReader;
import java.io.FileReader;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import projects.talGA.GlobalTALFitnessFunction;
import projects.talGA.MatchFinder;
import projects.talGA.OffTargetFitnessFunction;
import projects.talGA.RVDDistanceFitnessFunction;
import projects.talGA.RVDFitnessFunction;
import projects.talGA.RVDMatchFitnessFunction;
import projects.tals.LimitedSortedList;
import projects.tals.ScanForTBSWeb;
import projects.tals.TALgetterDiffSM;

public class TALDesigner {
    public static void main(String[] args) throws Exception {
        GeneticAlgorithm2<DiscreteSequenceIndividual> ga;
        int j;
        TALgetterDiffSM talFunction = (TALgetterDiffSM)XMLParser.extractObjectForTags(FileManager.readInputStream(ScanForTBSWeb.class.getClassLoader().getResourceAsStream("projects/tals/talfinder_obg2_hyp_bg.xml")), "model");
        System.out.println(talFunction);
        talFunction.fix();
        DiscreteAlphabet rvdAlph = (DiscreteAlphabet)talFunction.getRVDAlphabet().getAlphabetAt(0);
        int repetitions = Integer.parseInt(args[3]);
        int length = Integer.parseInt(args[4]);
        int numThreads = Integer.parseInt(args[5]);
        boolean thresholdByBestScore = true;
        int[] strong = new int[]{rvdAlph.getCode("NN"), rvdAlph.getCode("NH"), rvdAlph.getCode("HD"), rvdAlph.getCode("HN")};
        int[] allowed = new int[]{rvdAlph.getCode("NI"), rvdAlph.getCode("HD"), rvdAlph.getCode("NG"), rvdAlph.getCode("NN"), rvdAlph.getCode("NH"), rvdAlph.getCode("NK")};
        int initPopSize = 100;
        int finalPopSize = 100;
        int top = 10;
        DataSet positives = TALDesigner.readFile(args[0], null)[0];
        DataSet negatives = TALDesigner.readFile(args[1], positives)[0];
        DataSet background = TALDesigner.readFile(args[2], positives)[0];
        MatchFinder negFinder = negatives == null ? null : MatchFinder.getMatchFinder(negatives, talFunction, length);
        MatchFinder backFinder = MatchFinder.getMatchFinder(background, talFunction, length);
        LinkedList<FitnessFunction<DiscreteSequenceIndividual>> fitnesses = new LinkedList<FitnessFunction<DiscreteSequenceIndividual>>();
        fitnesses.add(new GlobalTALFitnessFunction(talFunction, positives, backFinder, GlobalTALFitnessFunction.Objective.MAXMARGIN, length, thresholdByBestScore));
        fitnesses.add(new GlobalTALFitnessFunction(talFunction, positives, backFinder, GlobalTALFitnessFunction.Objective.CL, length, thresholdByBestScore));
        fitnesses.add(new RVDFitnessFunction(strong, 3));
        fitnesses.add(new RVDDistanceFitnessFunction(strong, 5));
        fitnesses.add(new RVDMatchFitnessFunction(positives, talFunction));
        fitnesses.add(new OffTargetFitnessFunction(talFunction, positives, backFinder, thresholdByBestScore));
        if (negatives != null) {
            fitnesses.add(new GlobalTALFitnessFunction(talFunction, positives, negFinder, GlobalTALFitnessFunction.Objective.MAXMARGIN, length, thresholdByBestScore));
            fitnesses.add(new GlobalTALFitnessFunction(talFunction, positives, negFinder, GlobalTALFitnessFunction.Objective.CL, length, thresholdByBestScore));
            fitnesses.add(new OffTargetFitnessFunction(talFunction, positives, negFinder, thresholdByBestScore));
        }
        double fac = 0.05;
        double[] facs = new double[fitnesses.size()];
        Arrays.fill(facs, fac);
        FitnessFunction<Object> combined = new CombinedFitnessFunction(fitnesses.toArray(new FitnessFunction[0]), facs, true);
        combined = new MultiThreadedFitnessFunction(combined, numThreads);
        System.out.println("Simple Population");
        Population<DiscreteSequenceIndividual> simplePop = TALDesigner.getSimplePopulation(initPopSize, positives, length, talFunction, combined, allowed);
        System.out.println(simplePop);
        DiscreteSequencePopulationInitializer initializer = new DiscreteSequencePopulationInitializer(length, talFunction.getRVDAlphabet(), allowed);
        FixedPopulationInitializer<DiscreteSequenceIndividual> simpleInit = new FixedPopulationInitializer<DiscreteSequenceIndividual>(simplePop);
        double[][] condProbs = new double[(int)rvdAlph.length()][(int)rvdAlph.length()];
        boolean[] isStrong = new boolean[condProbs.length];
        int i = 0;
        while (i < isStrong.length) {
            j = 0;
            while (j < strong.length) {
                if (strong[j] == i) {
                    isStrong[i] = true;
                    break;
                }
                ++j;
            }
            ++i;
        }
        i = 0;
        while (i < condProbs.length) {
            j = 0;
            while (j < allowed.length) {
                condProbs[i][allowed[j]] = i == allowed[j] ? 0.0 : (isStrong[i] == isStrong[allowed[j]] ? 2.0 : 1.0);
                ++j;
            }
            Normalisation.sumNormalisation(condProbs[i]);
            ++i;
        }
        SymbolDependentMutation mutation = new SymbolDependentMutation(true, 0.05, condProbs);
        SimpleCrossover crossover = new SimpleCrossover(0.8);
        SimpleShift shift = new SimpleShift(true, 0.5);
        SimpleInDel indel = new SimpleInDel(0.05);
        RandomByFitnessSelection selection = new RandomByFitnessSelection(true);
        Population[] pops = new Population[repetitions];
        int r = 0;
        while (r < repetitions) {
            System.out.println("repetition " + r);
            ga = new GeneticAlgorithm2<DiscreteSequenceIndividual>(r == 0 ? simpleInit : initializer, new Operation[]{mutation, crossover, shift, indel}, selection, combined);
            ga.optimize(1000, initPopSize);
            pops[r] = ga.getFinalPopulation().removeDuplicates().getBestIndividuals((int)Math.ceil((double)finalPopSize / (double)repetitions));
            backFinder.reset();
            if (negFinder != null) {
                negFinder.reset();
            }
            ((CachingFitnessFunction)combined).reset();
            ++r;
        }
        System.out.println("final");
        Population init = new DiscreteSequencePopulation(new DiscreteSequenceIndividual[0]).join(pops);
        ga = new GeneticAlgorithm2<DiscreteSequenceIndividual>(new FixedPopulationInitializer(init), new Operation[]{mutation, crossover, shift}, selection, combined);
        ga.optimize(100, finalPopSize);
        Population<DiscreteSequenceIndividual> pop = ga.getFinalPopulation();
        pop = pop.removeDuplicates();
        pop = pop.join(init, simplePop);
        pop = pop.removeDuplicates();
        pop.sortIndividuals();
        ((MultiThreadedFitnessFunction)combined).stopWorkers();
        combined = new CachingFitnessFunction<DiscreteSequenceIndividual>(combined);
        combined.setOutputStream(SafeOutputStream.getSafeOutputStream(System.out));
        System.out.println("************************************************");
        System.out.println("Result:");
        System.out.println("++++++++++++++++++++++++++++++++++++++++++++++++");
        ArrayList<int[]> list = new ArrayList<int[]>();
        int i2 = 0;
        int j2 = 0;
        while (i2 < pop.getNumberOfIndivuals() && j2 < top) {
            boolean isnew = true;
            int[] curr = TALDesigner.getBestTargetPosition(pop.getIndividual(i2), talFunction, positives);
            int k = 0;
            while (k < list.size()) {
                block17: {
                    int[] prev = (int[])list.get(k);
                    int l = 0;
                    while (l < curr.length) {
                        if (curr[l] == prev[l]) {
                            ++l;
                            continue;
                        }
                        break block17;
                    }
                    isnew = false;
                }
                ++k;
            }
            if (isnew) {
                list.add(curr);
                SimpleDiscreteSequence tal = pop.getIndividual(i2).getSequence();
                System.out.println(String.valueOf(j2) + " " + pop.getIndividual(i2));
                combined.getFitness(pop.getIndividual(i2));
                double worstScore = Double.POSITIVE_INFINITY;
                int k2 = 0;
                while (k2 < curr.length) {
                    Sequence bestSeq = positives.getElementAt(k2).getSubSequence(curr[k2], pop.getIndividual(i2).getDimension() + 1);
                    double score = talFunction.getPartialLogScoreFor((Sequence)tal, bestSeq, 0, 0, tal.getLength() + 1);
                    if (score < worstScore) {
                        worstScore = score;
                    }
                    bestSeq = bestSeq.annotate(true, new ReferenceSequenceAnnotation("seq", tal, new Result[0]));
                    System.out.println(String.valueOf(k2) + " " + curr[k2] + " " + bestSeq + " " + talFunction.getMatchString(bestSeq));
                    ++k2;
                }
                System.out.println("\nOff targets:");
                LimitedSortedList<MatchFinder.Match> scs = backFinder.getScoresAbove(tal, worstScore - (double)(tal.getLength() + 1) * Math.log(2.0), 10000, true, false);
                if (scs.getLength() > 0) {
                    double sc = scs.getBestScore();
                    MatchFinder.Match m = scs.getBestElement();
                    Sequence temp = background.getElementAt(m.getSeqIdx()).getSubSequence(m.getSeqPos(), tal.getLength() + 1).annotate(true, new ReferenceSequenceAnnotation("seq", tal, new Result[0]));
                    System.out.println(String.valueOf(sc) + " " + temp + " " + talFunction.getMatchString(temp));
                } else {
                    System.out.println("No off-targets above threshold.");
                }
                System.out.println("+++++++++++++++++++++++++++++++++++++++++++++++++++++");
                ++j2;
            }
            ++i2;
        }
    }

    public static Population<DiscreteSequenceIndividual> getSimplePopulation(int topN, DataSet positives, int length, TALgetterDiffSM model, FitnessFunction<DiscreteSequenceIndividual> fitness, int[] allowed) throws Exception {
        ArrayList<DiscreteSequenceIndividual> list = new ArrayList<DiscreteSequenceIndividual>();
        int i = 0;
        while (i < positives.getNumberOfElements()) {
            Sequence seq = positives.getElementAt(i);
            int j = 0;
            while (j < seq.getLength() - length) {
                Sequence sub = seq.getSubSequence(j, length + 1);
                Sequence tal = model.getBestRVDsFor(sub, allowed);
                DiscreteSequenceIndividual ind = new DiscreteSequenceIndividual((SimpleDiscreteSequence)tal);
                ind.setFitness((FitnessFunction)fitness);
                list.add(ind);
                ++j;
            }
            ++i;
        }
        DiscreteSequencePopulation pop = new DiscreteSequencePopulation(list.toArray(new DiscreteSequenceIndividual[0]));
        return pop.getBestIndividuals(topN);
    }

    public static void addFilteredfilter(AlphabetContainer con, String str, String[] positives, List<Sequence> seqs, List<Sequence> outs) throws Exception {
        int i = 0;
        while (i < positives.length) {
            if (positives[i].indexOf(str) >= 0) {
                outs.add(new SparseSequence(con, str));
                return;
            }
            ++i;
        }
        LinkedList<String> strs = new LinkedList<String>();
        strs.add(str);
        LinkedList<String> strs2 = new LinkedList<String>();
        block1: while (strs.size() > 0) {
            String curr = (String)strs.pop();
            int i2 = 0;
            while (i2 < positives.length) {
                int idx = curr.indexOf(positives[i2]);
                if (idx >= 0) {
                    strs.add(curr.substring(0, idx));
                    strs.add(curr.substring(idx + positives[i2].length()));
                    outs.add(new SparseSequence(con, positives[i2]));
                    continue block1;
                }
                ++i2;
            }
            strs2.add(curr);
        }
        Iterator it = strs2.iterator();
        while (it.hasNext()) {
            seqs.add(new SparseSequence(con, (String)it.next()));
        }
    }

    public static DataSet[] readFile(String path, DataSet positives) throws Exception {
        if (path.equals("null")) {
            return new DataSet[2];
        }
        String[] posStr = new String[positives == null ? 0 : positives.getNumberOfElements()];
        int i = 0;
        while (i < posStr.length) {
            posStr[i] = positives.getElementAt(i).toString();
            ++i;
        }
        BufferedReader read = new BufferedReader(new FileReader(path));
        DNAAlphabetContainer con = DNAAlphabetContainer.SINGLETON;
        Pattern acgt = Pattern.compile("[ACGT]+", 2);
        ArrayList<Sequence> seqs = new ArrayList<Sequence>();
        ArrayList<Sequence> out = new ArrayList<Sequence>();
        long l = 0L;
        StringBuffer line = new StringBuffer();
        String str = "";
        while ((str = read.readLine()) != null) {
            if (str.startsWith(">")) {
                if (line.length() <= 0) continue;
                String lines = line.toString();
                TALDesigner.addFilteredfilter(con, lines, posStr, seqs, out);
                l += (long)line.length();
                line.delete(0, line.length());
                continue;
            }
            str = str.toUpperCase();
            Matcher match = acgt.matcher(str);
            while (match.find()) {
                int start = match.start();
                int end = match.end();
                if (start > 0 || end < str.length()) {
                    if (start > 0 && line.length() > 0) {
                        TALDesigner.addFilteredfilter(con, line.toString(), posStr, seqs, out);
                        l += (long)line.length();
                        line.delete(0, line.length());
                    }
                    line.append(str.substring(start, end));
                    if (end >= str.length() || line.length() <= 0) continue;
                    TALDesigner.addFilteredfilter(con, line.toString(), posStr, seqs, out);
                    l += (long)line.length();
                    line.delete(0, line.length());
                    continue;
                }
                line.append(str);
            }
        }
        read.close();
        if (line.length() > 0) {
            TALDesigner.addFilteredfilter(con, line.toString(), posStr, seqs, out);
            l += (long)line.length();
            line.delete(0, line.length());
        }
        str = null;
        System.out.println("total length: " + l);
        DataSet ds = new DataSet("", seqs);
        DataSet ds2 = null;
        System.out.println("excluded: " + out.size());
        if (out.size() > 0) {
            ds2 = new DataSet("", out);
        }
        seqs.clear();
        out.clear();
        return new DataSet[]{ds, ds2};
    }

    public static int[] getBestTargetPosition(DiscreteSequenceIndividual talInd, TALgetterDiffSM model, DataSet positives) {
        int[] pos = new int[positives.getNumberOfElements()];
        SimpleDiscreteSequence tal = talInd.getSequence();
        int i = 0;
        while (i < positives.getNumberOfElements()) {
            Sequence seq = positives.getElementAt(i);
            double best = Double.NEGATIVE_INFINITY;
            int j = 0;
            while (j < seq.getLength() - tal.getLength()) {
                double sc = model.getPartialLogScoreFor((Sequence)tal, seq, j, 0, tal.getLength() + 1);
                if (sc > best) {
                    best = sc;
                    pos[i] = j;
                }
                ++j;
            }
            ++i;
        }
        return pos;
    }
}

