
import java.io.BufferedReader;
import java.io.File;
import java.io.FileReader;

import de.jstacs.data.AlphabetContainer;
import de.jstacs.data.Sample;
import de.jstacs.data.Sequence;
import de.jstacs.data.alphabets.DNAAlphabet;
import de.jstacs.io.StringExtractor;
import de.jstacs.models.AbstractModel;
import de.jstacs.models.discrete.homogeneous.HomogeneousMM;
import de.jstacs.models.discrete.homogeneous.parameters.HomMMParameterSet;
import de.jstacs.models.discrete.inhomogeneous.BayesianNetworkModel;
import de.jstacs.models.discrete.inhomogeneous.StructureLearner.LearningType;
import de.jstacs.models.discrete.inhomogeneous.StructureLearner.ModelType;
import de.jstacs.models.discrete.inhomogeneous.parameters.BayesianNetworkModelParameterSet;
import de.jstacs.models.mixture.StrandModel;
import de.jstacs.models.mixture.AbstractMixtureModel.Parameterization;
import de.jstacs.models.mixture.motif.SingleHiddenMotifMixture;
import de.jstacs.models.mixture.motif.positionprior.GaussianLikePositionPrior;
import de.jstacs.models.mixture.motif.positionprior.PositionPrior;
import de.jstacs.motifDiscovery.MotifDiscoverer.KindOfProfile;

/**
 * This class provides a main that is used for the MotifAdjuster .
 * 
 * @author Jens Keilwagen
 */
public class MotifAdjuster
{
	/**
	 * @param args
	 *            <ol>
	 *            <li> file: the location of the data set (String)
	 *            <li> ignoreChar: char for comment lines (e.g. for a FastA-file '&gt;') (char)
	 *            <li> length: the motif length (int)
	 *            <li> fgOrder: the order of the inhomogeneuous Markov model that is uses for the motif; 0 yields in a
	 *            PWM (byte)
	 *            <li> ess: the equivalent sample size that is used for the mixture model (double &gt;= 0)
	 *            <li> bothStrands: use both strands (boolean)
	 *            <li> output: output of the EM (boolean)
	 *            <li> sigma: the sigma of the truncated discrete Gaussian distribution (double&gt;0)
	 *            <li> p(no motif): the probability for finding no motif (0&lt;=double&lt;1)
	 *            </ol>
	 */
	public static void main( String[] args )
	{
		System.out.println( "java ... MotifAdjuster <file> <ignoreChar> <length> <fgOrder> <ess> <bothStrands> <output> <sigma> <p(no motif)>" );
		try
		{
			AlphabetContainer con = new AlphabetContainer( new DNAAlphabet() );

			char ignore = args[1].charAt( 0 );
			Sample s = new Sample( con, new StringExtractor( new File( args[0] ), 200, ignore ) );

			if( s.getElementLength() == 0 )
			{
				System.out.println( "All sequences have to have the same length." );
			}
			else
			{
				System.out.println( s.getAnnotation() + ": " + s.getNumberOfElements() + " sequences of length "
						+ s.getElementLength() );

				int sl = Integer.parseInt( args[2] );
				int l = s.getElementLength(), max = (l - sl) / 2;

				byte fgOrder = Byte.parseByte( args[3] );
				double ess = Double.parseDouble( args[4] );
				boolean bothStrands = Boolean.parseBoolean( args[5] );
				double sigma = Double.parseDouble( args[7] );
				double pBg = Double.parseDouble( args[8] ), pwmESS = (1d-pBg)*ess, strandHyper=pwmESS/2d;
				
				BayesianNetworkModelParameterSet p = new BayesianNetworkModelParameterSet( con, sl, pwmESS, "foreground model", ModelType.IMM, fgOrder, LearningType.ML_OR_MAP );
				AbstractModel motifModel;
				if( bothStrands )
				{
					motifModel = new StrandModel( new BayesianNetworkModel( p ), 1, new double[]{ strandHyper, strandHyper }, 1, 1E-6, Parameterization.LAMBDA );
					((StrandModel) motifModel).setOutputStream( null );
				}
				else
				{
					motifModel = new BayesianNetworkModel( p );
				}

				AbstractModel backgroundModel = new HomogeneousMM( new HomMMParameterSet( con, (s.getElementLength() - (1d-pBg)*sl) * ess, null, (byte) 0 ) );

				// here you can alter the prior
				PositionPrior pr = new GaussianLikePositionPrior( l, max, sigma );
				System.out.println( "prior prop. to: exp( -(" + max + " - l)^2/(2 * " + sigma + "^2) )" );

				double stop = 1E-6;
				SingleHiddenMotifMixture em;
				
				em = new SingleHiddenMotifMixture( motifModel, backgroundModel, false, 10, 1d-pBg, pr, 1d, stop, Parameterization.LAMBDA );
				//em = new SingleHiddenMotifMixture( motifModel, backgroundModel, false, 10, new double[]{4,1}, pr, 1d, stop, Parameterization.LAMBDA );

				if( Boolean.parseBoolean( args[6] ) )
				{
					em.setOutputStream( System.out );
				}
				else
				{
					em.setOutputStream( null );
				}
				em.train( s );
				
				System.out.println();
				System.out.println( "models: " );

				System.out.println( em );
				motifModel = (AbstractModel) em.getModel( 0 );

				Sequence seq;
				int start;
				StrandModel strand = null;
				if( bothStrands )
				{
					strand = (StrandModel) motifModel;
				}
				String annot, line;
				System.out.println( "results for " + s.getNumberOfElements() + " sites" );
				System.out.println();
				System.out.println( "\"annotation\"\tsequence\tcontains BS\tpredicted shift\tpredicted strand\tadjusted BS" );

				BufferedReader reader = new BufferedReader( new FileReader( args[0] ) );
				while( (line = reader.readLine()) != null )
				{
					annot = " ";
					while( line.charAt( 0 ) == ignore )
					{
						annot = line;
						line = reader.readLine();
					}
					seq = Sequence.create( con, line );
					System.out.print( "\"" + annot.substring( 1 ).trim() + "\"\t" + seq + "\t" );
					if( em.getIndexOfMaximalComponentFor( seq ) == 1 )
					{
						System.out.print( "0" );
					}
					else
					{
						double[] prof = em.getProfileOfScoresFor( 0, 0, seq, 0, KindOfProfile.UNNORMALIZED_CONDITIONAL );
						start = getIndexOfMax( prof );
						System.out.print( "1\t" + (start - max) + "\t" );
						seq = seq.getSubSequence( start, sl );

						if( bothStrands )
						{
							if( strand.getIndexOfMaximalComponentFor( seq ) == 0 )
							{
								System.out.print( "forward " );
							}
							else
							{
								System.out.print( "rev. compl." );
								seq = seq.reverseComplement();
							}
						}
						else
						{
							System.out.print( "forward " );
						}
						System.out.print( "\t" + seq );
					}
					System.out.println();
				}
			}
		}
		catch( Exception e )
		{
			e.printStackTrace();
		}
	}
	
	private static int getIndexOfMax( double[] array ) {
		int idx = 0, i = 1;
		for( ; i < array.length; i++ ) {
			if( array[idx] < array[i] ) {
				idx = i;
			}
		}
		return idx;
	}
}