package projects.tals.pbmselex;

import de.jstacs.classifiers.performanceMeasures.AucPR;
import de.jstacs.classifiers.performanceMeasures.AucROC;
import de.jstacs.data.DataSet;
import de.jstacs.data.alphabets.DNAAlphabetContainer;
import de.jstacs.data.sequences.Sequence;
import de.jstacs.data.sequences.annotation.ReferenceSequenceAnnotation;
import de.jstacs.io.FileManager;
import de.jstacs.io.SparseStringExtractor;
import de.jstacs.io.XMLParser;
import de.jstacs.results.Result;
import de.jstacs.sequenceScores.statisticalModels.differentiable.DifferentiableStatisticalModel;
import de.jstacs.utils.DoubleList;
import de.jstacs.utils.Normalisation;
import de.jstacs.utils.ToolBox;
import java.io.BufferedReader;
import java.io.FileReader;
import java.io.PrintWriter;
import java.util.Arrays;
import java.util.HashMap;
import org.apache.batik.util.XBLConstants;
import org.apache.batik.util.XMLConstants;
import projects.tals.ScanForTBSCLI;
import projects.tals.TALgetterDiffSM;

/* loaded from: input_file:projects/tals/pbmselex/ExtractAndScoreSELEX.class */
public class ExtractAndScoreSELEX {
    public static void main(String[] strArr) throws Exception {
        BufferedReader bufferedReader = new BufferedReader(new FileReader("/Users/dev/Desktop/TAL-Chips/TALgetter2/SELEX/TALE_HT_SELEX/fa/rvds.fa"));
        HashMap hashMap = new HashMap();
        String str = null;
        while (true) {
            String readLine = bufferedReader.readLine();
            if (readLine == null) {
                break;
            } else if (readLine.startsWith(XMLConstants.XML_CLOSE_TAG_END)) {
                str = readLine.substring(1).trim();
            } else {
                hashMap.put(str, readLine.trim());
            }
        }
        bufferedReader.close();
        DifferentiableStatisticalModel differentiableStatisticalModel = (DifferentiableStatisticalModel) XMLParser.extractObjectForTags(FileManager.readInputStream(ScanForTBSCLI.class.getClassLoader().getResourceAsStream("projects/tals/talfinder_obg2_hyp_bg.xml")), "model");
        DataSet dataSet = new DataSet(DNAAlphabetContainer.SINGLETON, new SparseStringExtractor(strArr[0], '>'), "", 0, 0.01d);
        String replaceAll = strArr[0].replaceAll(".*/", "").replaceAll("_FLAG.*", "");
        HashMap hashMap2 = new HashMap();
        Sequence sequence = null;
        for (int i = 0; i < dataSet.getNumberOfElements(); i++) {
            Sequence elementAt = dataSet.getElementAt(i);
            Sequence subSequence = elementAt.getSubSequence(15, elementAt.getLength() - 15);
            if (sequence == null) {
                sequence = Sequence.create(((TALgetterDiffSM) differentiableStatisticalModel).getRVDAlphabet(), (String) hashMap.get(replaceAll), "-");
            }
            DoubleList doubleList = new DoubleList();
            for (int i2 = 0; i2 < subSequence.getLength() - sequence.getLength(); i2++) {
                Sequence annotate = subSequence.getSubSequence(i2, sequence.getLength() + 1).annotate(false, new ReferenceSequenceAnnotation(XBLConstants.XBL_REF_ATTRIBUTE, sequence, new Result[0]));
                doubleList.add(differentiableStatisticalModel.getLogScoreFor(annotate));
                doubleList.add(differentiableStatisticalModel.getLogScoreFor(annotate.reverseComplement().annotate(false, new ReferenceSequenceAnnotation(XBLConstants.XBL_REF_ATTRIBUTE, sequence, new Result[0]))));
            }
            if (doubleList.length() > 0) {
                double max = doubleList.max(0, doubleList.length());
                int maxIndex = doubleList.getMaxIndex();
                double[] array = doubleList.toArray();
                Normalisation.logSumNormalisation(array);
                double d = 0.0d;
                double d2 = array[maxIndex] * 0.1d;
                for (double d3 : array) {
                    if (d3 > d2) {
                        d += 1.0d;
                    }
                }
                String sequence2 = maxIndex % 2 == 0 ? subSequence.getSubSequence(maxIndex / 2, sequence.getLength() + 1).toString() : subSequence.getSubSequence((maxIndex - 1) / 2, sequence.getLength() + 1).reverseComplement().toString();
                if (hashMap2.containsKey(sequence2)) {
                    double[] dArr = (double[]) hashMap2.get(sequence2);
                    dArr[0] = dArr[0] + d;
                } else {
                    hashMap2.put(sequence2, new double[]{1.0d / d, max});
                }
            }
        }
        double[] dArr2 = new double[hashMap2.size()];
        double[] dArr3 = new double[hashMap2.size()];
        Sequence[] sequenceArr = new Sequence[hashMap2.size()];
        int i3 = 0;
        for (String str2 : hashMap2.keySet()) {
            double[] dArr4 = (double[]) hashMap2.get(str2);
            dArr2[i3] = dArr4[0];
            dArr3[i3] = dArr4[1];
            sequenceArr[i3] = Sequence.create(DNAAlphabetContainer.SINGLETON, str2);
            i3++;
        }
        double pearsonCorrelation = ToolBox.pearsonCorrelation(dArr2, dArr3);
        for (int i4 = 0; i4 < dArr2.length; i4++) {
            dArr2[i4] = Math.log(dArr2[i4]);
        }
        double pearsonCorrelation2 = ToolBox.pearsonCorrelation(dArr2, dArr3);
        double mean = ToolBox.mean(dArr2) + (2.0d * ToolBox.sd(0, dArr2.length, dArr2));
        double[] dArr5 = (double[]) dArr2.clone();
        Arrays.sort(dArr5);
        if (mean > dArr5[dArr5.length - 50]) {
            mean = dArr5[dArr5.length - 50];
        }
        DoubleList doubleList2 = new DoubleList();
        DoubleList doubleList3 = new DoubleList();
        for (int i5 = 0; i5 < dArr2.length; i5++) {
            if (dArr2[i5] >= mean) {
                doubleList2.add(dArr3[i5]);
            } else {
                doubleList3.add(dArr3[i5]);
            }
        }
        double[] array2 = doubleList2.toArray();
        Arrays.sort(array2);
        double[] array3 = doubleList3.toArray();
        Arrays.sort(array3);
        System.out.println(String.valueOf(strArr[0]) + "\t" + pearsonCorrelation + "\t" + pearsonCorrelation2 + "\t" + ((Double) new AucROC().compute(array2, array3).getResultAt(0).getValue()).doubleValue() + "\t" + ((Double) new AucPR().compute(array2, array3).getResultAt(0).getValue()).doubleValue() + "\t" + array2.length + "\t" + (array2.length + array3.length));
        PrintWriter printWriter = new PrintWriter(String.valueOf(strArr[0]) + "_extracted.txt");
        for (int i6 = 0; i6 < dArr2.length; i6++) {
            printWriter.println(sequenceArr[i6] + "\t" + dArr2[i6] + "\t" + dArr3[i6] + "\t" + sequence.toString("-", 0, sequence.getLength()));
        }
        printWriter.close();
    }
}
