package projects.tals;

import de.jstacs.DataType;
import de.jstacs.data.AlphabetContainer;
import de.jstacs.data.DataSet;
import de.jstacs.data.alphabets.DNAAlphabetContainer;
import de.jstacs.data.alphabets.DiscreteAlphabet;
import de.jstacs.data.sequences.Sequence;
import de.jstacs.data.sequences.annotation.ReferenceSequenceAnnotation;
import de.jstacs.data.sequences.annotation.SequenceAnnotation;
import de.jstacs.data.sequences.annotation.SequenceAnnotationParser;
import de.jstacs.data.sequences.annotation.SimpleSequenceAnnotationParser;
import de.jstacs.io.NonParsableException;
import de.jstacs.io.SparseStringExtractor;
import de.jstacs.parameters.FileParameter;
import de.jstacs.parameters.Parameter;
import de.jstacs.parameters.ParameterSet;
import de.jstacs.parameters.SelectionParameter;
import de.jstacs.parameters.SimpleParameter;
import de.jstacs.parameters.SimpleParameterSet;
import de.jstacs.parameters.validation.NumberValidator;
import de.jstacs.results.CategoricalResult;
import de.jstacs.results.NumericalResult;
import de.jstacs.results.Result;
import de.jstacs.results.ResultSet;
import de.jstacs.sequenceScores.statisticalModels.differentiable.AbstractDifferentiableStatisticalModel;
import de.jstacs.sequenceScores.statisticalModels.trainable.discrete.homogeneous.HomogeneousMM;
import de.jstacs.sequenceScores.statisticalModels.trainable.discrete.homogeneous.parameters.HomMMParameterSet;
import de.jstacs.tools.ui.galaxy.MultilineSimpleParameter;
import de.jstacs.utils.ComparableElement;
import de.jstacs.utils.IntList;
import de.jstacs.utils.Pair;
import htsjdk.samtools.SAMSequenceRecord;
import htsjdk.variant.vcf.VCFConstants;
import java.io.BufferedReader;
import java.io.Reader;
import java.io.StringReader;
import java.util.Arrays;
import java.util.HashSet;
import java.util.LinkedList;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.batik.dom.svg.SVGPathSegConstants;
import org.apache.batik.util.SVGConstants;
import org.apache.fop.pdf.PDFGState;
import org.apache.xmlgraphics.image.loader.spi.ImagePreloader;

/* loaded from: input_file:projects/tals/TBSScanner.class */
public class TBSScanner {

    /* loaded from: input_file:projects/tals/TBSScanner$BackgroundDistribution.class */
    private static class BackgroundDistribution {
        private static final int r = 10;
        double[] bestScores;
        double[] restScores;
        double pivot;
        double pivotP;

        private BackgroundDistribution(DataSet dataSet, AbstractDifferentiableStatisticalModel abstractDifferentiableStatisticalModel, Sequence sequence, int i, PVals pVals) throws Exception {
            int min = Math.min(ImagePreloader.DEFAULT_PRIORITY, i);
            int i2 = 10;
            if (pVals == PVals.NONE) {
                throw new Exception();
            }
            i2 = pVals == PVals.COARSE ? 1 : i2;
            this.pivot = 0.0d;
            this.restScores = new double[i];
            int i3 = 0;
            this.bestScores = new double[min * i2];
            for (int i4 = 0; i4 < i2; i4++) {
                double[] sortedScores = getSortedScores(dataSet, abstractDifferentiableStatisticalModel, sequence, i);
                System.arraycopy(sortedScores, sortedScores.length - min, this.bestScores, i3, min);
                i3 += min;
                this.pivot += sortedScores[sortedScores.length - min];
                for (int i5 = 0; i5 < sortedScores.length; i5++) {
                    double[] dArr = this.restScores;
                    int i6 = i5;
                    dArr[i6] = dArr[i6] + sortedScores[i5];
                }
            }
            Arrays.sort(this.bestScores);
            this.pivot /= i2;
            this.pivotP = min / i;
            for (int i7 = 0; i7 < this.restScores.length; i7++) {
                double[] dArr2 = this.restScores;
                int i8 = i7;
                dArr2[i8] = dArr2[i8] / i2;
            }
        }

        private double[] getSortedScores(DataSet dataSet, AbstractDifferentiableStatisticalModel abstractDifferentiableStatisticalModel, Sequence sequence, int i) throws Exception {
            HomogeneousMM homogeneousMM = new HomogeneousMM(new HomMMParameterSet(dataSet.getAlphabetContainer(), 4.0d, "", (byte) 2));
            homogeneousMM.train(dataSet);
            Sequence elementAt = homogeneousMM.emitDataSet(1, i + sequence.getLength()).getElementAt(0);
            double[] dArr = new double[i];
            for (int i2 = 0; i2 < i; i2++) {
                dArr[i2] = abstractDifferentiableStatisticalModel.getLogScoreFor(elementAt.getSubSequence(i2, sequence.getLength() + 1).annotate(true, new ReferenceSequenceAnnotation("seq", sequence, new Result[0])));
            }
            Arrays.sort(dArr);
            return dArr;
        }

        /* JADX INFO: Access modifiers changed from: private */
        public double getPValue(double d) {
            if (d <= this.pivot) {
                int binarySearch = Arrays.binarySearch(this.restScores, d);
                if (binarySearch < 0) {
                    binarySearch = (-binarySearch) - 1;
                }
                return (this.restScores.length - binarySearch) / this.restScores.length;
            }
            int binarySearch2 = Arrays.binarySearch(this.bestScores, d);
            if (binarySearch2 < 0) {
                binarySearch2 = (-binarySearch2) - 1;
            }
            return this.pivotP * ((this.bestScores.length - binarySearch2) / this.bestScores.length);
        }

        /* synthetic */ BackgroundDistribution(DataSet dataSet, AbstractDifferentiableStatisticalModel abstractDifferentiableStatisticalModel, Sequence sequence, int i, PVals pVals, BackgroundDistribution backgroundDistribution) throws Exception {
            this(dataSet, abstractDifferentiableStatisticalModel, sequence, i, pVals);
        }
    }

    /* loaded from: input_file:projects/tals/TBSScanner$GeneLinkResult.class */
    public static class GeneLinkResult extends CategoricalResult {
        private static Object[][] map = {new Object[]{Pattern.compile("Os[0-9]+[a-zA-Z]+[0-9]+(\\.[0-9]+)?"), "http://rice.plantbiology.msu.edu/cgi-bin/ORF_infopage.cgi?orf=LOC_$$$"}, new Object[]{Pattern.compile("AT[0-9]+[a-zA-Z]+[0-9]+(\\.[0-9]+)"), "http://www.arabidopsis.org/servlets/TairObject?name=$$$&type=gene"}, new Object[]{Pattern.compile("AT[0-9]+[a-zA-Z]+[0-9]+"), "http://www.arabidopsis.org/servlets/TairObject?name=$$$&type=locus"}};

        public GeneLinkResult(String str, String str2, String str3) {
            super(str, str2, str3);
        }

        public GeneLinkResult(StringBuffer stringBuffer) throws NonParsableException {
            super(stringBuffer);
        }

        @Override // de.jstacs.results.SimpleResult, de.jstacs.AnnotatedEntity
        public String getValue() {
            String obj = super.getValue().toString();
            for (int i = 0; i < map.length; i++) {
                Matcher matcher = ((Pattern) map[i][0]).matcher(obj);
                if (matcher.find()) {
                    int start = matcher.start();
                    int end = matcher.end();
                    return String.valueOf(obj.substring(0, start)) + "<a href=\"" + ((String) map[i][1]).replaceAll("\\$\\$\\$", obj.substring(start, end)) + "\" target=\"_blank\">" + obj.substring(start, end) + "</a>" + obj.substring(end);
                }
            }
            return obj;
        }
    }

    /* loaded from: input_file:projects/tals/TBSScanner$PVals.class */
    public enum PVals {
        NONE,
        COARSE,
        FINE;

        /* renamed from: values, reason: to resolve conflict with enum method */
        public static PVals[] valuesCustom() {
            PVals[] valuesCustom = values();
            int length = valuesCustom.length;
            PVals[] pValsArr = new PVals[length];
            System.arraycopy(valuesCustom, 0, pValsArr, 0, length);
            return pValsArr;
        }
    }

    /* loaded from: input_file:projects/tals/TBSScanner$ResultList.class */
    public static class ResultList {
        private ComparableElement<ResultSet, Double>[] list;
        private int curr = 0;

        public ResultList(int i) {
            this.list = new ComparableElement[i];
        }

        public boolean better(double d) {
            return this.list[this.curr] == null || (-d) < this.list[this.curr].getWeight().doubleValue();
        }

        public void add(ResultSet resultSet, double d) {
            if (this.list[this.curr] != null) {
                if ((-d) < this.list[this.curr].getWeight().doubleValue()) {
                    this.list[this.curr] = new ComparableElement<>(resultSet, Double.valueOf(-d));
                    Arrays.sort(this.list);
                    return;
                }
                return;
            }
            this.list[this.curr] = new ComparableElement<>(resultSet, Double.valueOf(-d));
            if (this.curr >= this.list.length - 1) {
                Arrays.sort(this.list);
            } else {
                this.curr++;
                Arrays.sort(this.list, 0, this.curr);
            }
        }

        /* JADX WARN: Type inference failed for: r10v11, types: [de.jstacs.results.Result[], de.jstacs.results.Result[][]] */
        /* JADX WARN: Type inference failed for: r10v3, types: [de.jstacs.results.Result[], de.jstacs.results.Result[][]] */
        /* JADX WARN: Type inference failed for: r10v7, types: [de.jstacs.results.Result[], de.jstacs.results.Result[][]] */
        public DataSet getBindingSites() throws Exception {
            if (getNumberOfResults() == 0) {
                return null;
            }
            Sequence[] sequenceArr = new Sequence[getNumberOfResults()];
            for (int i = 0; i < sequenceArr.length; i++) {
                sequenceArr[i] = Sequence.create(DNAAlphabetContainer.SINGLETON, new SequenceAnnotation[]{new SequenceAnnotation("ID", (String) this.list[i].getElement().getResultForName("ID").getValue(), (Result[][]) new Result[0]), new SequenceAnnotation("Position", this.list[i].getElement().getResultForName("Position").getValue().toString(), (Result[][]) new Result[0]), new SequenceAnnotation("Score", this.list[i].getElement().getResultForName("Score").getValue().toString(), (Result[][]) new Result[0])}, (String) this.list[i].getElement().getResultForName("Sequence").getValue(), "");
            }
            return new DataSet("binding sites", sequenceArr);
        }

        public ResultSet[] toArray() {
            ResultSet[] resultSetArr = new ResultSet[getNumberOfResults()];
            for (int i = 0; i < resultSetArr.length; i++) {
                resultSetArr[i] = this.list[i].getElement();
            }
            return resultSetArr;
        }

        public double getBestScore() {
            if (this.list[0] == null) {
                return Double.NEGATIVE_INFINITY;
            }
            return -this.list[0].getWeight().doubleValue();
        }

        public double getWorstScore() {
            if (this.curr == 0 && this.list[this.curr] == null) {
                return Double.NEGATIVE_INFINITY;
            }
            return this.list[this.curr] == null ? -this.list[this.curr - 1].getWeight().doubleValue() : -this.list[this.curr].getWeight().doubleValue();
        }

        public int getNumberOfResults() {
            return this.list[this.curr] == null ? this.curr : this.curr + 1;
        }
    }

    /* loaded from: input_file:projects/tals/TBSScanner$TBSScannerParameterSet.class */
    public static class TBSScannerParameterSet extends ParameterSet {
        public TBSScannerParameterSet() throws Exception {
            this.parameters.add(new SelectionParameter(DataType.PARAMETERSET, new String[]{"Use a previously uploaded file", "Paste sequences in FastA format"}, new ParameterSet[]{new SimpleParameterSet(new FileParameter("FastA file", "The sequences to scan for TAL effector target sites, FastA format", "fasta", true)), new SimpleParameterSet(new MultilineSimpleParameter("FastA sequences", "The sequences to scan for TAL effector target sites, FastA format", true))}, "Input sequences", "You can either use a previously uploaded file (see task &quot;GetData&quot; -&gt; &quot;Upload File&quot;) or paste in sequences in FastA format", true));
            this.parameters.add(new MultilineSimpleParameter("RVD sequence", "Sequence of RVDs, seperated by '-'", true, (Object) "NI-HD-HD-NG-NN-NK-NK"));
            this.parameters.add(new SimpleParameter(DataType.INT, "Upstream offset", "Number of positions ignored at 5' end of each sequence", true, (Object) 0));
            this.parameters.add(new SimpleParameter(DataType.INT, "Downstream offset", "Number of positions ignored at 3' end of each sequence", true, (Object) 0));
            this.parameters.add(new SimpleParameter(DataType.INT, "Maximum number of target sites", "Limits the total number of reported target sites in all input sequences, ranked by their score.", true, new NumberValidator(1, 10000), 100));
            SimpleParameter simpleParameter = new SimpleParameter(DataType.DOUBLE, "p-Value", "Filter the reported hits by a maximum p-Value. A value of 0 or 1 switches off the filter.", true, new NumberValidator(Double.valueOf(0.0d), Double.valueOf(1.0d)), Double.valueOf(1.0E-6d));
            SelectionParameter selectionParameter = new SelectionParameter(DataType.PARAMETERSET, new String[]{"No p-Values (fastest)", "Coarse p-Values (faster but less accurate)", "Fine-grained p-Values (slower but more accurate)"}, new ParameterSet[]{new SimpleParameterSet(new Parameter[0]), new SimpleParameterSet(simpleParameter), new SimpleParameterSet(simpleParameter)}, "Computation of p-Values", "Mode to compute p-values for predicted target sites. If no p-values are computed, filtering for p-values is not available.", true);
            selectionParameter.setDefault("Fine-grained p-Values (slower but more accurate)");
            this.parameters.add(selectionParameter);
        }

        public Parameter[] getAllParameters() {
            return (Parameter[]) this.parameters.toArray(new Parameter[0]);
        }

        public void addParameter(int i, Parameter parameter) {
            this.parameters.add(i, parameter);
        }

        public PVals computePValues() {
            int selected = ((SelectionParameter) this.parameters.get("Computation of p-Values")).getSelected();
            if (selected == 0) {
                return PVals.NONE;
            }
            if (selected == 1) {
                return PVals.COARSE;
            }
            if (selected == 2) {
                return PVals.FINE;
            }
            throw new RuntimeException("Computation of p-Values is required parameter.");
        }

        public String getTALSequence() {
            return (String) this.parameters.get("RVD sequence").getValue();
        }

        public void setInputPath(String str) throws Exception {
            ((SelectionParameter) this.parameters.get("Input sequences")).setValue("Use a previously uploaded file");
            ((FileParameter) ((SimpleParameterSet) ((SelectionParameter) this.parameters.get("Input sequences")).getValue()).getParameterAt(0)).setValue(new FileParameter.FileRepresentation(str));
        }

        public DataSet getInputSequences() throws Exception {
            if (((SelectionParameter) this.parameters.get("Input sequences")).getSelected() == 0) {
                return new DataSet(new AlphabetContainer(new DiscreteAlphabet(true, "A", "C", "G", "T", "N", "W", SVGPathSegConstants.PATHSEG_CURVETO_CUBIC_SMOOTH_ABS_LETTER, "M", "K", "R", "Y", SVGConstants.SVG_B_VALUE, PDFGState.GSTATE_DASH_PATTERN, "H", "V")), new SparseStringExtractor((String) ((SimpleParameterSet) ((SelectionParameter) this.parameters.get("Input sequences")).getValue()).getParameterAt(0).getValue(), '>', (SequenceAnnotationParser) new SimpleSequenceAnnotationParser()));
            }
            return new DataSet(new AlphabetContainer(new DiscreteAlphabet(true, "A", "C", "G", "T", "N", "W", SVGPathSegConstants.PATHSEG_CURVETO_CUBIC_SMOOTH_ABS_LETTER, "M", "K", "R", "Y", SVGConstants.SVG_B_VALUE, PDFGState.GSTATE_DASH_PATTERN, "H", "V")), new SparseStringExtractor((Reader) new BufferedReader(new StringReader((String) ((SimpleParameterSet) ((SelectionParameter) this.parameters.get("Input sequences")).getValue()).getParameterAt(0).getValue())), '>', (String) null, (SequenceAnnotationParser) new SimpleSequenceAnnotationParser()));
        }

        public int getN() {
            return ((Integer) this.parameters.get("Maximum number of target sites").getValue()).intValue();
        }

        public double getPValue() {
            ParameterSet parameterSet = (ParameterSet) this.parameters.get("Computation of p-Values").getValue();
            if (parameterSet.getNumberOfParameters() == 0) {
                return 0.0d;
            }
            return ((Double) parameterSet.getParameterAt(0).getValue()).doubleValue();
        }

        /* JADX INFO: Access modifiers changed from: private */
        public int getFirstPosition() {
            return ((Integer) this.parameters.get("Upstream offset").getValue()).intValue();
        }

        /* JADX INFO: Access modifiers changed from: private */
        public int getDownstreamOffset() {
            return ((Integer) this.parameters.get("Downstream offset").getValue()).intValue();
        }
    }

    /* JADX WARN: Type inference failed for: r3v44, types: [de.jstacs.results.Result[], de.jstacs.results.Result[][]] */
    /* JADX WARN: Type inference failed for: r3v47, types: [de.jstacs.results.Result[], de.jstacs.results.Result[][]] */
    public static ResultList[] scan(TALgetterDiffSM tALgetterDiffSM, TBSScannerParameterSet tBSScannerParameterSet, DataSet dataSet) throws Exception {
        if (!tBSScannerParameterSet.hasDefaultOrIsSet()) {
            System.err.println("Some of the required parameters are not specified.");
            System.exit(1);
        }
        Pair<int[][], DataSet> preprocess = preprocess(dataSet);
        int[][] firstElement = preprocess.getFirstElement();
        DataSet secondElement = preprocess.getSecondElement();
        Sequence create = Sequence.create(new AlphabetContainer(new DiscreteAlphabet(true, "NI", "NG", "NN", VCFConstants.SAMPLE_NUMBER_KEY, "N*", "ND", "NK", "NC", "NV", "NA", "NH", "HD", "HG", "HA", "H*", "HH", "HI", "HN", "S*", SAMSequenceRecord.SEQUENCE_NAME_TAG, "SS", "IG", "YG", "NP", "NT", "IS")), tBSScannerParameterSet.getTALSequence(), "-");
        double size = getSize(secondElement, create);
        if (size > 2.0E8d) {
            System.err.println("Data set too large. Currently at most 90 Mb allowed");
        }
        int firstPosition = tBSScannerParameterSet.getFirstPosition();
        int downstreamOffset = tBSScannerParameterSet.getDownstreamOffset();
        ResultList resultList = new ResultList(tBSScannerParameterSet.getN());
        ResultList resultList2 = new ResultList(tBSScannerParameterSet.getN());
        PVals computePValues = tBSScannerParameterSet.computePValues();
        BackgroundDistribution backgroundDistribution = computePValues != PVals.NONE ? new BackgroundDistribution(secondElement, tALgetterDiffSM, create, Math.max(10000000, (int) size), computePValues, null) : null;
        double pValue = tBSScannerParameterSet.getPValue();
        for (int i = 0; i < secondElement.getNumberOfElements(); i++) {
            Sequence elementAt = secondElement.getElementAt(i);
            String trim = ((String) elementAt.getSequenceAnnotationByType("unparsed comment line", 0).getResultForName("unparsed comment").getValue()).trim();
            for (int i2 = 0; i2 < elementAt.getLength() - create.getLength(); i2++) {
                Sequence annotate = elementAt.getSubSequence(i2, create.getLength() + 1).annotate(true, elementAt.getAnnotation()).annotate(true, new ReferenceSequenceAnnotation("seq", create, new Result[0]));
                double logScoreFor = tALgetterDiffSM.getLogScoreFor(annotate);
                if (resultList.better(logScoreFor)) {
                    double pValue2 = backgroundDistribution != null ? backgroundDistribution.getPValue(logScoreFor) : 0.0d;
                    if (pValue == 0.0d || pValue >= pValue2) {
                        double d = pValue2 * size;
                        int length = (elementAt.getLength() - ((i2 + create.getLength()) + 1)) + firstElement[1][i];
                        if (i2 >= firstPosition && length >= downstreamOffset) {
                            String matchString = tALgetterDiffSM.getMatchString(annotate);
                            Result[] resultArr = new Result[8];
                            resultArr[0] = new CategoricalResult("ID", "", trim);
                            resultArr[1] = new NumericalResult("Position", "", i2 + firstElement[0][i]);
                            resultArr[2] = new NumericalResult("Distance to end", "", (elementAt.getLength() - ((i2 + create.getLength()) + 1)) + firstElement[1][i]);
                            resultArr[3] = new CategoricalResult("Sequence", "", annotate.toString());
                            resultArr[4] = new CategoricalResult("Matches", "", matchString);
                            resultArr[5] = new NumericalResult("Score", "", logScoreFor);
                            resultArr[6] = computePValues == PVals.NONE ? new CategoricalResult("p-value", "", "NA") : new NumericalResult("p-value", "", pValue2);
                            resultArr[7] = computePValues == PVals.NONE ? new CategoricalResult("E-value", "", "NA") : new NumericalResult("E-value", "", d);
                            resultList.add(new ResultSet((Result[][]) new Result[]{resultArr}), logScoreFor);
                            resultArr[0] = new GeneLinkResult("ID", "", trim);
                            resultList2.add(new ResultSet((Result[][]) new Result[]{resultArr}), logScoreFor);
                        }
                    }
                }
            }
        }
        return new ResultList[]{resultList, resultList2};
    }

    /* JADX WARN: Multi-variable type inference failed */
    public static Pair<int[][], DataSet> preprocess(DataSet dataSet) throws Exception {
        Pattern compile = Pattern.compile("[ACGT]+", 2);
        LinkedList linkedList = new LinkedList();
        IntList intList = new IntList();
        IntList intList2 = new IntList();
        for (int i = 0; i < dataSet.getNumberOfElements(); i++) {
            Sequence elementAt = dataSet.getElementAt(i);
            String sequence = elementAt.toString();
            Matcher matcher = compile.matcher(sequence);
            while (matcher.find()) {
                int start = matcher.start();
                int end = matcher.end();
                linkedList.add(Sequence.create(DNAAlphabetContainer.SINGLETON, elementAt.getAnnotation(), sequence.substring(start, end), ""));
                intList.add(start);
                intList2.add(elementAt.getLength() - end);
            }
        }
        return new Pair<>(new int[]{intList.toArray(), intList2.toArray()}, new DataSet("", (Sequence[]) linkedList.toArray(new Sequence[0])));
    }

    private static double getSize(DataSet dataSet, Sequence sequence) {
        double d = 0.0d;
        for (int i = 0; i < dataSet.getNumberOfElements(); i++) {
            d += dataSet.getElementAt(i).getLength() - sequence.getLength();
        }
        return d;
    }

    private static HashSet<Sequence> makeHash(DataSet dataSet) {
        HashSet<Sequence> hashSet = new HashSet<>();
        for (int i = 0; i < dataSet.getNumberOfElements(); i++) {
            hashSet.add(dataSet.getElementAt(i));
        }
        return hashSet;
    }
}
