package projects.methyl;

import cern.colt.matrix.impl.AbstractFormatter;
import de.jstacs.classifiers.differentiableSequenceScoreBased.gendismix.GenDisMixClassifier;
import de.jstacs.data.DataSet;
import de.jstacs.data.sequences.Sequence;
import de.jstacs.data.sequences.annotation.SequenceAnnotationParser;
import de.jstacs.data.sequences.annotation.SimpleSequenceAnnotationParser;
import de.jstacs.io.SparseStringExtractor;
import de.jstacs.parameters.FileParameter;
import de.jstacs.results.ResultSet;
import de.jstacs.results.TextResult;
import de.jstacs.sequenceScores.statisticalModels.differentiable.DifferentiableStatisticalModel;
import de.jstacs.tools.JstacsTool;
import de.jstacs.tools.ProgressUpdater;
import de.jstacs.tools.Protocol;
import de.jstacs.tools.ToolParameterSet;
import de.jstacs.tools.ToolResult;
import de.jstacs.tools.ui.cli.CLI;
import de.jstacs.utils.DoubleList;
import de.jstacs.utils.Normalisation;
import de.jstacs.utils.SafeOutputStream;
import htsjdk.samtools.fastq.FastqConstants;
import java.io.File;
import java.io.FileOutputStream;
import java.io.Reader;
import java.io.StringReader;
import java.util.Date;
import java.util.LinkedList;
import projects.dimont.ThresholdedStrandChIPper;

/* loaded from: input_file:projects/methyl/MotifScanningTool.class */
public class MotifScanningTool implements JstacsTool {
    public static void main(String[] strArr) throws Exception {
        new CLI(new MotifScanningTool()).run(strArr);
    }

    @Override // de.jstacs.tools.JstacsTool
    public ToolParameterSet getToolParameters() {
        LinkedList linkedList = new LinkedList();
        linkedList.add(new FileParameter("Input sequences", "Input sequences in FastA format", "fasta,fa,fas", true));
        linkedList.add(new FileParameter("Model", "Model XML as output by Methyl SlimDimont", "xml", true));
        return new ToolParameterSet(getShortName(), linkedList);
    }

    @Override // de.jstacs.tools.JstacsTool
    public ToolResult run(ToolParameterSet toolParameterSet, Protocol protocol, ProgressUpdater progressUpdater, int i) throws Exception {
        String sequence;
        progressUpdater.setLast(1.0d);
        progressUpdater.setCurrent(0.0d);
        GenDisMixClassifier genDisMixClassifier = new GenDisMixClassifier(new StringBuffer(((FileParameter) toolParameterSet.getParameterAt(1)).getFileContents().getContent()));
        DataSet dataSet = new DataSet(genDisMixClassifier.getAlphabetContainer(), new SparseStringExtractor((Reader) new StringReader(((FileParameter) toolParameterSet.getParameterAt(0)).getFileContents().getContent()), '>', "", (SequenceAnnotationParser) new SimpleSequenceAnnotationParser()));
        DifferentiableStatisticalModel function = ((ThresholdedStrandChIPper) genDisMixClassifier.getDifferentiableSequenceScore(0)).getFunction(0);
        File createTempFile = File.createTempFile("dimontscan", "_dgs.temp");
        createTempFile.deleteOnExit();
        SafeOutputStream safeOutputStream = SafeOutputStream.getSafeOutputStream(new FileOutputStream(createTempFile));
        progressUpdater.setCurrent(0.2d);
        double numberOfElements = dataSet.getNumberOfElements();
        for (int i2 = 0; i2 < dataSet.getNumberOfElements(); i2++) {
            Sequence elementAt = dataSet.getElementAt(i2);
            String str = (String) elementAt.getSequenceAnnotationByType("unparsed comment line", 0).getResultAt(0).getValue();
            DoubleList doubleList = new DoubleList();
            double d = Double.NEGATIVE_INFINITY;
            int i3 = 0;
            String str2 = FastqConstants.QUALITY_HEADER;
            for (int i4 = 0; i4 < (elementAt.getLength() - function.getLength()) + 1; i4++) {
                double logScoreFor = function.getLogScoreFor(elementAt, i4);
                doubleList.add(logScoreFor);
                if (logScoreFor > d) {
                    d = logScoreFor;
                    i3 = i4;
                }
            }
            Sequence reverseComplement = elementAt.reverseComplement();
            for (int i5 = 0; i5 < (reverseComplement.getLength() - function.getLength()) + 1; i5++) {
                double logScoreFor2 = function.getLogScoreFor(reverseComplement, i5);
                doubleList.add(logScoreFor2);
                if (logScoreFor2 > d) {
                    d = logScoreFor2;
                    i3 = i5;
                    str2 = "-";
                }
            }
            double logSum = Normalisation.getLogSum(doubleList.toArray()) - Math.log(elementAt.getLength() * 2);
            if (str2.equals(FastqConstants.QUALITY_HEADER)) {
                sequence = elementAt.toString(i3, i3 + function.getLength());
            } else {
                sequence = reverseComplement.toString(i3, i3 + function.getLength());
                i3 = (reverseComplement.getLength() - function.getLength()) - i3;
            }
            safeOutputStream.writeln(String.valueOf(i2 + 1) + "\t" + (i3 + 1) + "\t" + str2 + "\t" + d + "\t" + logSum + "\t" + sequence + "\t" + str);
            progressUpdater.setCurrent(0.2d + ((i2 / numberOfElements) * 0.8d));
        }
        progressUpdater.setCurrent(1.0d);
        return new ToolResult("Result of " + getToolName(), "", null, new ResultSet(new TextResult("Predictions", "Result", new FileParameter.FileRepresentation(createTempFile.getAbsolutePath()), true, "tsv", getToolName(), null, true)), toolParameterSet, getToolName(), new Date());
    }

    @Override // de.jstacs.tools.JstacsTool
    public String getToolName() {
        return "Sequence Scoring";
    }

    @Override // de.jstacs.tools.JstacsTool
    public String getToolVersion() {
        return "0.1";
    }

    @Override // de.jstacs.tools.JstacsTool
    public String getShortName() {
        return "score";
    }

    @Override // de.jstacs.tools.JstacsTool
    public String getDescription() {
        return "Scan input sequences for motif matches";
    }

    @Override // de.jstacs.tools.JstacsTool
    public String getHelpText() {
        return "**" + getToolName() + "** scans a set of input sequences (e.g., sequences under ChIP-seq peaks) for a given motif model (provided as XML as output by \"Methyl SlimDimont\" and provides per sequence information of i) the start position and strand of the best motif match, ii) the corresponding maximum score, iii) the log-sum occupancy score, iv) the matching sequence, and v) the ID (FastaA header) of the sequence.\n\nThe purpose of this tool mainly is to determine per-sequence scores for classification, for instance, distinguishing bound from unbound sequences.\n\nIf you experience problems using " + getToolName() + ", please contact_ us.\n" + AbstractFormatter.DEFAULT_ROW_SEPARATOR + ".. _contact: mailto:grau@informatik.uni-halle.de";
    }

    @Override // de.jstacs.tools.JstacsTool
    public JstacsTool.ResultEntry[] getDefaultResultInfos() {
        return null;
    }

    @Override // de.jstacs.tools.JstacsTool
    public ToolResult[] getTestCases(String str) {
        return null;
    }

    @Override // de.jstacs.tools.JstacsTool
    public void clear() {
    }

    @Override // de.jstacs.tools.JstacsTool
    public String[] getReferences() {
        return null;
    }
}
