/*
 * Decompiled with CFR 0.152.
 */
package projects.xanthogenomes.tools;

import de.jstacs.DataType;
import de.jstacs.data.AlphabetContainer;
import de.jstacs.data.DataSet;
import de.jstacs.data.EmptyDataSetException;
import de.jstacs.data.WrongAlphabetException;
import de.jstacs.data.alphabets.Alphabet;
import de.jstacs.data.alphabets.DNAAlphabetContainer;
import de.jstacs.data.alphabets.DiscreteAlphabet;
import de.jstacs.data.bioJava.BioJavaAdapter;
import de.jstacs.data.sequences.Sequence;
import de.jstacs.data.sequences.annotation.SequenceAnnotation;
import de.jstacs.data.sequences.annotation.SequenceAnnotationParser;
import de.jstacs.data.sequences.annotation.SimpleSequenceAnnotationParser;
import de.jstacs.io.FileManager;
import de.jstacs.io.SparseStringExtractor;
import de.jstacs.parameters.FileParameter;
import de.jstacs.parameters.SimpleParameter;
import de.jstacs.results.Result;
import de.jstacs.results.ResultSet;
import de.jstacs.results.TextResult;
import de.jstacs.tools.JstacsTool;
import de.jstacs.tools.ProgressUpdater;
import de.jstacs.tools.Protocol;
import de.jstacs.tools.ToolParameterSet;
import de.jstacs.tools.ToolResult;
import de.jstacs.utils.Pair;
import java.io.BufferedReader;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.OutputStream;
import java.io.StringReader;
import java.util.ArrayList;
import java.util.Date;
import java.util.HashSet;
import java.util.LinkedList;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.biojava.bio.SimpleAnnotation;
import org.biojava.bio.seq.Feature;
import org.biojava.bio.seq.FeatureHolder;
import org.biojava.bio.seq.SequenceIterator;
import org.biojavax.Namespace;
import org.biojavax.RichObjectFactory;
import org.biojavax.SimpleNamespace;
import org.biojavax.SimpleNote;
import org.biojavax.bio.seq.Position;
import org.biojavax.bio.seq.RichFeature;
import org.biojavax.bio.seq.RichLocation;
import org.biojavax.bio.seq.RichSequence;
import org.biojavax.bio.seq.RichSequenceIterator;
import org.biojavax.bio.seq.SimplePosition;
import org.biojavax.bio.seq.SimpleRichFeature;
import org.biojavax.bio.seq.SimpleRichLocation;
import projects.xanthogenomes.NHMMer;
import projects.xanthogenomes.Tools;

public class TALEPredictionTool
implements JstacsTool {
    @Override
    public ToolParameterSet getToolParameters() {
        try {
            FileParameter input = new FileParameter("Genome", "The input Xanthomonas genome in FastA or Genbank format", "fasta,fa,fas,fna,gb,gbk,genbank", true);
            SimpleParameter strain = new SimpleParameter(DataType.STRING, "Strain", "The name of the strain, will be used for annotated TALEs", false);
            SimpleParameter sens = new SimpleParameter(DataType.BOOLEAN, "Sensitive", "Sensitive scan", true, false);
            ToolParameterSet ps = new ToolParameterSet(this.getShortName(), input, strain, sens);
            return ps;
        }
        catch (Exception e) {
            e.printStackTrace();
            return null;
        }
    }

    @Override
    public ToolResult run(ToolParameterSet parameters, Protocol protocol, ProgressUpdater progress, int threads) throws Exception {
        RichSequenceIterator it;
        BufferedReader br;
        String temp;
        progress.setLast(1.0);
        progress.setCurrent(0.0);
        SimpleParameter strainp = (SimpleParameter)parameters.getParameterAt(1);
        String strain = "";
        String strainstr = "";
        if (strainp.isSet() && (temp = (String)strainp.getValue()) != null && temp.trim().length() > 0) {
            strain = " (" + temp + ")";
            strainstr = String.valueOf(temp) + "-";
        }
        FileParameter fp = (FileParameter)parameters.getParameterAt(0);
        FileParameter.FileRepresentation fr = fp.getFileContents();
        boolean sensitive = (Boolean)parameters.getParameterAt(2).getValue();
        DataSet ds = null;
        String content = fr.getContent();
        RichSequence[] seqs = null;
        SimpleNamespace ns = new SimpleNamespace("biojava");
        protocol.append("Loading input genome...\n");
        AlphabetContainer con = new AlphabetContainer((Alphabet)new DiscreteAlphabet(true, "A", "C", "G", "T", "N", "W", "S", "M", "K", "R", "Y", "B", "D", "H", "V"));
        boolean fasta = content.trim().startsWith(">");
        if (!fasta) {
            try {
                br = new BufferedReader(new StringReader(content));
                it = RichSequence.IOTools.readGenbankDNA((BufferedReader)br, (Namespace)ns);
                ds = BioJavaAdapter.sequenceIteratorToDataSet((SequenceIterator)it, null, con);
                br.close();
                br = new BufferedReader(new StringReader(content));
                it = RichSequence.IOTools.readGenbankDNA((BufferedReader)br, (Namespace)ns);
                LinkedList<RichSequence> li = new LinkedList<RichSequence>();
                while (it.hasNext()) {
                    RichSequence seq = it.nextRichSequence();
                    li.add(seq);
                }
                seqs = li.toArray(new RichSequence[0]);
            }
            catch (Exception e) {
                protocol.appendWarning("... loading failed.\n\n");
                protocol.appendThrowable(e);
                throw new Exception("Input not in expected format");
            }
        }
        try {
            br = new BufferedReader(new StringReader(content));
            ds = new DataSet(con, new SparseStringExtractor(br, '>', "", (SequenceAnnotationParser)new SimpleSequenceAnnotationParser()));
            it = BioJavaAdapter.dataSetToSequenceIterator(ds, false, true);
            seqs = new RichSequence[ds.getNumberOfElements()];
            int i = 0;
            while (i < seqs.length) {
                seqs[i] = (RichSequence)it.nextSequence();
                ++i;
            }
        }
        catch (Exception ex) {
            protocol.appendWarning("... loading failed.\n\n");
            protocol.appendThrowable(ex);
            throw new Exception("Input not in expected format");
        }
        Pair<DataSet, ArrayList<Integer>[]> pair = this.preprocess(ds);
        ds = pair.getFirstElement();
        ArrayList<Integer>[] poss = pair.getSecondElement();
        protocol.append("Scanning genome for TALEs...\n");
        int[][] regions = NHMMer.run(new InputStreamReader(TALEPredictionTool.class.getClassLoader().getResourceAsStream("projects/xanthogenomes/data/repeats.hmm")), new InputStreamReader(TALEPredictionTool.class.getClassLoader().getResourceAsStream("projects/xanthogenomes/data/starts.hmm")), new InputStreamReader(TALEPredictionTool.class.getClassLoader().getResourceAsStream("projects/xanthogenomes/data/ends.hmm")), ds, progress, sensitive);
        protocol.append("...finished.\n\n");
        int i = 0;
        while (i < regions.length) {
            int seqIdx = regions[i][0];
            int start = regions[i][1];
            int end = regions[i][2];
            boolean contN = false;
            int j = 0;
            while (j < poss[seqIdx].size()) {
                int idx = poss[seqIdx].get(j);
                if (idx >= start && idx <= end) {
                    contN = true;
                }
                ++j;
            }
            if (contN) {
                protocol.appendWarning(String.valueOf(strainstr) + "tempTALE" + (i + 1) + " contained \"N\"s in predicted CDS, which have been replaced. Please use with care.\n");
            }
            ++i;
        }
        protocol.append("Writing GFF output.\n");
        StringBuffer sb = new StringBuffer();
        int i2 = 0;
        while (i2 < regions.length) {
            Sequence seq = ds.getElementAt(regions[i2][0]);
            String id = null;
            SequenceAnnotation ann = seq.getSequenceAnnotationByType("unparsed comment line", 0);
            if (ann != null) {
                id = ann.getResultAt(0).getValue().toString().trim();
            } else {
                Result res;
                SequenceAnnotation ann2 = seq.getSequenceAnnotationByTypeAndIdentifier("BioJava RichSequence Annotation", "BJRSA");
                if (ann2 != null && (res = ann2.getResultForName("Name")) != null) {
                    id = res.getValue().toString();
                }
            }
            sb.append(String.valueOf(id) + "\tTALE-prediction\tmRNA\t" + (regions[i2][4] + 1) + "\t" + regions[i2][5] + "\t.\t" + (regions[i2][3] < 0 ? "-" : "+") + "\t.\tId=" + strainstr + "tempTALE" + (i2 + 1) + (regions[i2][6] == 0 ? "" : "; Note=putative pseudo gene") + "\n");
            sb.append(String.valueOf(id) + "\tTALE-prediction\tCDS\t" + (regions[i2][1] + 1) + "\t" + regions[i2][2] + "\t.\t" + (regions[i2][3] < 0 ? "-" : "+") + "\t.\tParent=" + strainstr + "tempTALE" + (i2 + 1) + "\n");
            ++i2;
        }
        TextResult fres = new TextResult("GFF: TALE predictions" + strain, "TALE predictions in GFF format", new FileParameter.FileRepresentation("", sb.toString()), "gff3", "TALE Prediction", null, true);
        protocol.append("Writing Genbank output.\n");
        int i3 = 0;
        while (i3 < regions.length) {
            RichFeature.Template temp2 = new RichFeature.Template();
            temp2.location = new SimpleRichLocation((Position)new SimplePosition(regions[i3][1] + 1), (Position)new SimplePosition(regions[i3][2]), i3, regions[i3][3] < 0 ? RichLocation.Strand.NEGATIVE_STRAND : RichLocation.Strand.POSITIVE_STRAND);
            temp2.source = "TALE-prediction";
            temp2.type = "CDS";
            temp2.annotation = new SimpleAnnotation();
            temp2.featureRelationshipSet = new HashSet();
            temp2.rankedCrossRefs = new HashSet();
            SimpleRichFeature feat = new SimpleRichFeature((FeatureHolder)seqs[regions[i3][0]], (Feature.Template)temp2);
            feat.getNoteSet().add(new SimpleNote(RichObjectFactory.getDefaultOntology().getOrCreateTerm("gene"), String.valueOf(strainstr) + "tempTALE" + (i3 + 1), 1));
            seqs[regions[i3][0]].getFeatureSet().add(feat);
            temp2.type = "mRNA";
            temp2.location = new SimpleRichLocation((Position)new SimplePosition(regions[i3][4] + 1), (Position)new SimplePosition(regions[i3][5]), i3, regions[i3][3] < 0 ? RichLocation.Strand.NEGATIVE_STRAND : RichLocation.Strand.POSITIVE_STRAND);
            feat = new SimpleRichFeature((FeatureHolder)seqs[regions[i3][0]], (Feature.Template)temp2);
            feat.getNoteSet().add(new SimpleNote(RichObjectFactory.getDefaultOntology().getOrCreateTerm("gene"), String.valueOf(strainstr) + "tempTALE" + (i3 + 1), 1));
            if (regions[i3][6] == 1) {
                feat.getNoteSet().add(new SimpleNote(RichObjectFactory.getDefaultOntology().getOrCreateTerm("note"), "putative pseudo gene", 2));
            }
            seqs[regions[i3][0]].getFeatureSet().add(feat);
            ++i3;
        }
        ByteArrayOutputStream os = new ByteArrayOutputStream();
        int i4 = 0;
        while (i4 < seqs.length) {
            RichSequence.IOTools.writeGenbank((OutputStream)os, (org.biojava.bio.seq.Sequence)seqs[i4], (Namespace)ns);
            ++i4;
        }
        String cont = os.toString("UTF-8");
        TextResult fres2 = new TextResult("Genbank: TALE predictions" + strain, "Annotated TALEs in Genbank format", new FileParameter.FileRepresentation("", cont), "gb", "TALE Prediction", null, true);
        StringBuffer pseudo = new StringBuffer();
        StringBuffer dna = new StringBuffer();
        StringBuffer prot = new StringBuffer();
        protocol.append("Writing FastA outputs.\n");
        int i5 = 0;
        while (i5 < regions.length) {
            Sequence seq = ds.getElementAt(regions[i5][0]).getSubSequence(regions[i5][1], regions[i5][2] - regions[i5][1]);
            if (regions[i5][3] < 0) {
                seq = seq.reverseComplement();
            }
            String posString = "[" + regions[i5][1] + "-" + regions[i5][2] + ":" + regions[i5][3] + "]";
            dna.append(">" + strainstr + "tempTALE" + (i5 + 1) + (regions[i5][6] == 1 ? " (Pseudo)" : "") + " " + posString + "\n" + seq + "\n");
            Sequence seq2 = Tools.Translator.DEFAULT.translate(seq, 0);
            prot.append(">" + strainstr + "tempTALE" + (i5 + 1) + (regions[i5][6] == 1 ? " (Pseudo)" : "") + " " + posString + "\n" + seq2 + "\n");
            if (regions[i5][6] == 1) {
                Sequence pseudoDNA = ds.getElementAt(regions[i5][0]).getSubSequence(regions[i5][4], regions[i5][5] - regions[i5][4]);
                if (regions[i5][3] < 0) {
                    pseudoDNA = pseudoDNA.reverseComplement();
                }
                posString = "[" + regions[i5][4] + "-" + regions[i5][5] + ":" + regions[i5][3] + "]";
                pseudo.append(">" + strainstr + "tempTALE" + (i5 + 1) + (regions[i5][6] == 1 ? " (Pseudo)" : "") + " " + posString + "\n" + pseudoDNA + "\n");
                int j = 0;
                while (j < 3) {
                    Sequence pseudoProt = Tools.Translator.DEFAULT.translate(pseudoDNA, j);
                    pseudo.append(">" + strainstr + "tempTALE" + (i5 + 1) + (regions[i5][6] == 1 ? " (Pseudo)" : "") + " frame: " + j + " " + posString + "\n" + pseudoProt + "\n");
                    ++j;
                }
            }
            ++i5;
        }
        TextResult fres3 = new TextResult("TALE DNA sequences" + strain, "The DNA sequences of the TALE CDS", new FileParameter.FileRepresentation("", dna.toString()), "fasta", "TALE Prediction", "fasta/dna", true);
        TextResult fres4 = new TextResult("TALE protein sequences" + strain, "The protein sequences of the TALE CDS", new FileParameter.FileRepresentation("", prot.toString()), "fasta", "TALE Prediction", "fasta/as", true);
        ResultSet set = null;
        if (pseudo.length() > 0) {
            TextResult fres5 = new TextResult("TALE pseudo gene matches" + strain, "The complete matching sequences of the TALE pseudo genes as DNA and translated in all three reading frames", new FileParameter.FileRepresentation("", pseudo.toString()), "fasta", "TALE Prediction", "fasta/as", true);
            set = new ResultSet(new Result[][]{{fres, fres2, fres3, fres4, fres5}});
        } else {
            set = new ResultSet(new Result[][]{{fres, fres2, fres3, fres4}});
        }
        String file = "";
        if (fr.getFilename() != null) {
            file = " on " + fr.getFilename();
        }
        return new ToolResult("Result of " + this.getToolName() + strain, String.valueOf(this.getToolName()) + file, null, set, parameters, this.getToolName(), new Date(System.currentTimeMillis()));
    }

    private Pair<DataSet, ArrayList<Integer>[]> preprocess(DataSet ds) throws IllegalArgumentException, WrongAlphabetException, EmptyDataSetException {
        Sequence[] seqs = ds.getAllElements();
        Pattern pat = Pattern.compile("[^ACGTacgt]");
        ArrayList[] poss = new ArrayList[ds.getNumberOfElements()];
        int i = 0;
        while (i < seqs.length) {
            poss[i] = new ArrayList();
            String seqstr = seqs[i].toString();
            SequenceAnnotation[] anns = seqs[i].getAnnotation();
            Matcher m = pat.matcher(seqstr);
            while (m.find()) {
                int pos = m.start();
                poss[i].add(pos);
            }
            seqstr = m.replaceAll("A");
            seqs[i] = Sequence.create(DNAAlphabetContainer.SINGLETON, anns, seqstr, "");
            ++i;
        }
        return new Pair<DataSet, ArrayList<Integer>[]>(new DataSet(ds.getAnnotation(), seqs), poss);
    }

    @Override
    public String getToolName() {
        return "TALE Prediction";
    }

    @Override
    public String getShortName() {
        return "predict";
    }

    @Override
    public String getDescription() {
        return "Predicts TALE in a genome";
    }

    @Override
    public String getHelpText() {
        try {
            return FileManager.readInputStream(TALEPredictionTool.class.getClassLoader().getResourceAsStream("projects/xanthogenomes/tools/TALEPredictionTool.txt")).toString();
        }
        catch (IOException e) {
            e.printStackTrace();
            return "";
        }
    }

    @Override
    public String getToolVersion() {
        return "1.4.2";
    }

    @Override
    public JstacsTool.ResultEntry[] getDefaultResultInfos() {
        return null;
    }

    @Override
    public ToolResult[] getTestCases(String path) {
        return null;
    }

    @Override
    public void clear() {
    }

    @Override
    public String[] getReferences() {
        return new String[]{"@article{grau16annotale,\n\ttitle = {{AnnoTALE}: bioinformatics tools for identification, annotation, and nomenclature of {TALEs} from \\emph{Xanthomonas} genomic sequences},\n\tauthor = {Grau, Jan and Reschke, Maik and Erkes, Annett and Streubel, Jana and Morgan, Richard D. and Wilson, Geoffrey G. and Koebnik, Ralf and Boch, Jens},\n\tjournal = {Scientific Reports},\n\tyear = {2016},\n\tvolume = {6},\n\tpages = {21077},\n\tdoi = {10.1038/srep21077}\n\t}\n"};
    }
}

