package de.jstacs.models.mixture.motif;

import java.util.Arrays;

import javax.naming.OperationNotSupportedException;

import de.jstacs.NonParsableException;
import de.jstacs.WrongAlphabetException;
import de.jstacs.data.Sample;
import de.jstacs.data.Sequence;
import de.jstacs.io.XMLParser;
import de.jstacs.models.Model;
import de.jstacs.models.mixture.AbstractMixtureModel;
import de.jstacs.models.mixture.gibbssampling.BurnInTest;
import de.jstacs.models.mixture.motif.positionprior.PositionPrior;
import de.jstacs.motifDiscovery.MotifDiscoverer;

/**
 * This is the main class that every generative motif discoverer should implement. It implements the mixture of
 * containing 0 or 1 motif in a sequence.
 * 
 * @author Jens Keilwagen
 */
public abstract class HiddenMotifMixture extends AbstractMixtureModel implements MotifDiscoverer
{
	/**
	 * The prior for the positions.
	 */
	protected PositionPrior posPrior;

	/**
	 * A switch that enables to train only the motif model.
	 */
	protected boolean trainOnlyMotifModel;

	/**
	 * The order of the background model.
	 */
	protected byte bgMaxMarkovOrder;

	private static final boolean[] getOptimizeArray( int l, boolean trainOnlyMotifModel )
	{
		boolean[] res = new boolean[l];
		if( trainOnlyMotifModel )
		{
			Arrays.fill( res, false );
			res[0] = true;
		}
		else
		{
			Arrays.fill( res, true );
		}
		return res;		
	}
	
	/**
	 * Creates a new AbstractMixtureModel. This constructor can be used for any algorithm since it takes all
	 * necessary values as parameters.
	 * 
	 * @param models
	 *            the single models building the AbstractMixtureModel, if the model is trained using
	 *            {@link de.jstacs.models.mixture.AbstractMixtureModel.Algorithm#GIBBS_SAMPLING} the models that will be adjusted have to implement
	 *            {@link de.jstacs.models.mixture.gibbssampling.GibbsSamplingComponent}.
	 *            The models that are used for the flanking sequences have to be able to score sequences of arbitrary length.
	 * @param starts
	 *            the number of times the algorithm will be started in the train method, at least 1
	 * @param estimateComponentProbs
	 *            the switch for estimating the component probabilities in the algorithm or to hold them fixed;
	 *            if the component parameters are fixed, the values of <code>weights</code> will be used, otherwise
	 *            the <code>componentHyperParams</code> will be incorporated in the adjustment
	 * @param componentHyperParams
	 *            the hyperparameters for the component assignment prior,
	 *            <ul>
	 *            <li>will only be used if <code>estimateComponentProbs == true</code>
	 *            <li>the array has to be <code>null</code> or has to have length <code>dimension</code>
	 *            <li><code>null</code> or an array with all values zero (0) than ML
	 *            <li>otherwise (all values positive) a prior is used (MAP, MP, ...)
	 *            <li>depends on the <code>parameterization</code>
	 *            </ul>
	 * @param weights
	 *            <code>null</code> or the weights for the components (than <code>weights.length == dimension</code>)
	 * @param posPrior
	 * 			  this object determine the positional distribution that shall be used            
	 * @param trainOnlyMotifModel
	 *            a switch whether to train only the motif model
	 * @param algorithm
	 *            either {@link de.jstacs.models.mixture.AbstractMixtureModel.Algorithm#EM}
	 *            or {@link de.jstacs.models.mixture.AbstractMixtureModel.Algorithm#GIBBS_SAMPLING}
	 * @param alpha
	 *            only for {@link de.jstacs.models.mixture.AbstractMixtureModel.Algorithm#EM}<br>
	 *            the positive parameter for the Dirichlet which is used when you invoke <code>train</code> to initialize
	 *            the gammas. It is recommended to use <code>alpha = 1</code> (uniform distribution on a simplex).
	 * @param eps
	 *            only for {@link de.jstacs.models.mixture.AbstractMixtureModel.Algorithm#EM}<br>
	 *            the non-negative threshold for stopping the EM-algorithm
	 * @param parametrization
	 *            only for {@link de.jstacs.models.mixture.AbstractMixtureModel.Algorithm#EM}<br>
	 *            the type of the component probability parameterization;
	 *            <ul>
	 *            <li>{@link de.jstacs.models.mixture.AbstractMixtureModel.Parameterization#THETA}
	 *            	or {@link de.jstacs.models.mixture.AbstractMixtureModel.Parameterization#LAMBDA}
	 *            <li>the parameterization of in a component is determined by the component model
	 *            <li>it is recommended to use the same parameterization for the components and the component assignment probabilities
	 *            <li>it is recommended to use <code>LAMBDA</code>
	 *            <ul>
	 * @param initialIteration
	 *            only for {@link de.jstacs.models.mixture.AbstractMixtureModel.Algorithm#GIBBS_SAMPLING}<br>
	 *            the positive length of the initial sampling phase (at least 1, at most <code>stationaryIteration/starts</code>)
	 * @param stationaryIteration
	 *            only for {@link de.jstacs.models.mixture.AbstractMixtureModel.Algorithm#GIBBS_SAMPLING}<br>
	 *            the positive length of the stationary phase (at least 1) (summed over all starts), i.e. the number of parameter sets
	 *            that is used in approximation
	 * @param burnInTest
	 *            only for {@link de.jstacs.models.mixture.AbstractMixtureModel.Algorithm#GIBBS_SAMPLING}<br>
	 *            the test that will be used to determine the length of the burn-in phase
	 *            
	 * @throws IllegalArgumentException
	 *             if
	 *             <ul>
	 *             <li>the models are not able to score the sequence of the corresponding length
	 *             <li><code>weights != null && weights.length != 2</code>
	 *             <li><code>weights != null</code> and it exists an <code>i</code> where <code>weights[i] &lt; 0</code>
	 *             <li><code>starts &lt; 1</code>
	 *             <li>componentHyperParams are not correct
	 *             <li>the algorithm specific parameters are not correct
	 *             </ul>
	 * @throws WrongAlphabetException
	 *             if not all <code>models</code> work on the same simple alphabet
	 * @throws CloneNotSupportedException
	 *             if the <code>models</code> can not be cloned
	 */
	protected HiddenMotifMixture( Model[] models, int starts, boolean estimateComponentProbs, double[] componentHyperParams, double[] weights,
			PositionPrior posPrior, boolean trainOnlyMotifModel, Algorithm algorithm,
			double alpha, double eps, Parameterization parametrization, //EM parameters
			int initialIteration, int stationaryIteration, BurnInTest burnInTest ) //GIBBS_SAMPLING parameters
			throws CloneNotSupportedException, IllegalArgumentException, WrongAlphabetException
	{
		super( posPrior.getLength(), models, getOptimizeArray( models.length, trainOnlyMotifModel ), 2, starts,
				estimateComponentProbs, componentHyperParams, weights, algorithm, alpha, eps, parametrization, initialIteration, stationaryIteration, burnInTest );
		if( !alphabets.isSimple() )
		{
			throw new WrongAlphabetException( "The AlphabetContainer has to be simple." );
		}
		this.posPrior = posPrior.clone();
		this.trainOnlyMotifModel = trainOnlyMotifModel;
		this.posPrior.setMotifLength( getMotifLength(0) );
		bgMaxMarkovOrder = model[1].getMaximalMarkovOrder();
	}

	/**
	 * The standard constructor for the interface {@link de.jstacs.Storable}.
	 * 
	 * @param xml
	 *            the StringBuffer containing the model
	 * 
	 * @throws NonParsableException
	 *             if the StringBuffer can not be parsed
	 */
	protected HiddenMotifMixture( StringBuffer xml ) throws NonParsableException
	{
		super( xml );
	}

	public HiddenMotifMixture clone() throws CloneNotSupportedException
	{
		HiddenMotifMixture clone = (HiddenMotifMixture) super.clone();
		clone.posPrior = posPrior.clone();
		return clone;
	}

	protected StringBuffer getFurtherInformation()
	{
		StringBuffer erg = new StringBuffer( 1000 );
		XMLParser.appendStorableWithTags( erg, posPrior, "posPrior" );
		return erg;
	}

	protected void extractFurtherInformation( StringBuffer xml ) throws NonParsableException
	{
		posPrior = (PositionPrior) XMLParser.extractStorableForTag( xml, "posPrior" );
		
		posPrior.setMotifLength( getMotifLength(0) );
		bgMaxMarkovOrder = model[1].getMaximalMarkovOrder();
		
		int i = 1;
		while( i < model.length && !optimizeModel[i] )
		{
		  i++;
		}
		trainOnlyMotifModel = i == model.length;
	}

	public void train( Sample data, double[] weights ) throws Exception
	{
		if( data.getMinimalElementLength() < getMinimalSequenceLength() )
		{
			throw new IllegalArgumentException(
					"The sample contains sequence that are not allowed in this MotifDiscoverer. The minimal length is "
							+ getMinimalSequenceLength() + "." );
		}
		super.train( data, weights );
	}

	/**
	 * This method trains the bg-model. This can be useful if the bg-model is not trained while the EM-algorithm.
	 * 
	 * @param data
	 *            the sample
	 * @param weights
	 *            the weights
	 * 
	 * @throws Exception
	 *             if something went wrong
	 */
	public final void trainBgModel( Sample data, double[] weights ) throws Exception
	{
		model[1].train( data, weights );
	}

	protected void getNewParameters( int iteration, double[][] seqWeights, double[] w ) throws Exception
	{
		getNewParametersForModel( 0, iteration, 0, seqWeights[0] );
		if( !trainOnlyMotifModel )
		{
			for( int i = 1; i < model.length; i++ )
			{
				getNewParametersForModel( i, iteration, i, seqWeights[i] );
			}
		}
		
		getNewComponentProbs( w );
	}

	protected void checkLength( int index, int l )
	{
		if( index == 0 )
		{
			if( length != 0 && length < l )
			{
				throw new IllegalArgumentException(
						"The motif length is bigger than the length of the sequences the should be modeled." );
			}
		}
		else
		{
			if( l != 0 )
			{
				throw new IllegalArgumentException(
						"All models accept the motif model have to be homogeneous. Violated at position " + index + "." );
			}
		}
	}
	
	/**
	 * Returns the minimal length a sequence respectively a sample has to have.
	 * 
	 * @return the minimal length a sequence respectively a sample has to have
	 */
	public abstract int getMinimalSequenceLength();

	public String getInstanceName()
	{
		StringBuffer erg = new StringBuffer( getClass().getSimpleName() + "(" );
		erg.append( model[0].getInstanceName() );
		for( int i = 1; i < model.length; i++ )
		{
			erg.append( ", " );
			erg.append( model[i].getInstanceName() );
		}
		erg.append( "; " + posPrior.getInstanceName() );
		if( !estimateComponentProbs )
		{
			erg.append( "; " + Arrays.toString( weights ) );
		}
		erg.append( ")" );
		return erg.toString();
	}

	public double getLogPriorTerm() throws Exception
	{
		double erg = 0;
		for( int counter = 0, l = trainOnlyMotifModel ? 1 : model.length; counter < l; counter++ )
		{
			erg += model[counter].getLogPriorTerm();
		}
		return erg + getLogPriorTermForComponentProbs();
	}

	public String toString()
	{
		StringBuffer sb = new StringBuffer( 100000 );
		sb.append( weights[0] + "\tmotif\n" );
		sb.append( weights[1] + "\tno motif\n\n" );
		sb.append( "position prior: " + posPrior.getInstanceName() + "\n\n" );
		for( int i = 0; i < dimension; i++ )
		{
			sb.append( model[i].getInstanceName() + "\n" + model[i].toString() + "\n" );
		}
		return sb.toString();
	}
	
	/**
	 * Standard implementation throwing an OperationNotSupportedException.
	 */
	protected Sequence[] emitSampleUsingCurrentParameterSet( int n, int... lengths ) throws Exception
	{
		throw new OperationNotSupportedException();
	}
}
