/*
 * This file is part of Jstacs.
 *
 * Jstacs is free software: you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 * 
 * Jstacs is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with Jstacs.  If not, see <http://www.gnu.org/licenses/>.
 * 
 * For more information on Jstacs, visit http://www.jstacs.de
 */

package de.jstacs.models;

import de.jstacs.NotTrainedException;
import de.jstacs.Storable;
import de.jstacs.data.AlphabetContainer;
import de.jstacs.data.Sample;
import de.jstacs.data.Sequence;
import de.jstacs.results.NumericalResultSet;
import de.jstacs.results.ResultSet;

/**
 * This interface defines all methods for a probabilistic model.
 * 
 * @author Andre Gohr, Jan Grau, Jens Keilwagen
 */
public interface Model extends Cloneable, Storable{
	
	/**
	 * Creates a clone (deep copy) of the current <code>Model</code> instance.
	 * @return the cloned instance
	 * @throws CloneNotSupportedException
	 */
	public Model clone() throws CloneNotSupportedException;
	
	/**
	 * Trains the AbstractModel object given the data as <code>Sample</code>. <br>
	 * This method should work non-incrementally. That means the result of the following series: train(data1);
	 * train(data2) should be a fully trained model over data2 and not over data1+data2. All parameters of the model were
	 * given by the call of the constructor.
	 * 
	 * @param data
	 *            the given sequences as Sample
	 * @throws Exception
	 *             an Exception should be thrown if the training did not succeed.
	 * 
	 * @see Sample#getElementAt(int)
	 * @see de.jstacs.data.Sample.ElementEnumerator
	 */
	public void train( Sample data ) throws Exception;

	/**
	 * Trains the Model object given the data as <code>Sample</code> using the specified weights. The weight
	 * at position i belongs to the element at position i. So the array <code>weight</code> should have the number of
	 * sequences in the sample as dimension. (Optionally it is possible to use <code>weight == null</code> if all
	 * weights have the value one.)<br>
	 * This method should work non-incrementally. That means the result of the following series: train(data1);
	 * train(data2) should be a fully trained model over data2 and not over data1+data2. All parameters of the model were
	 * given by the call of the constructor.
	 * 
	 * @param data
	 *            the given sequences
	 * @param weights
	 *            the weights of the elements, each weight should be non-negative
	 * @throws Exception
	 *             an Exception should be thrown if the training did not succeed (e.g. the weights dimension of weights
	 *             and number of samples does not match).
	 * 
	 * @see Sample#getElementAt(int)
	 * @see de.jstacs.data.Sample.ElementEnumerator
	 */
	public void train( Sample data, double[] weights ) throws Exception;

	/**
	 * Returns the probability of the given sequence given the model. If a least one random variable is continuous the
	 * value of density function is returned.
	 * 
	 * <br>
	 * 
	 * The <code>length</code> and <code>alphabets</code> define the type of data that can be modeled and therefore
	 * both has to be checked.
	 * 
	 * @param sequence
	 *            the sequence
	 * 
	 * @return the probability or the value of the density function of the given sequence given the model
	 * 
	 * @throws Exception
	 *             an Exception should be thrown if the sequence could not be handled by the model
	 * @throws NotTrainedException
	 *             a NotTrainedException should be thrown if the model is not trained yet.
	 */
	public double getProbFor( Sequence sequence ) throws NotTrainedException, Exception;

	/**
	 * Returns the probability of the given sequence given the model. If a least one random variable is continuous the
	 * value of density function is returned.
	 * 
	 * <br>
	 * <br>
	 * 
	 * If the length of the sequences, whose probability should be returned, is fixed (e.g. in a inhomogenous model) and
	 * the given sequence is longer than their fixed length, the start position within the given sequence is given by
	 * startpos. E.g. the fixed length is 12. The length of the given sequence is 30 and the startpos=15 the probability
	 * of the part from position 15 to 26 (inclusive) given the model should be returned.
	 * 
	 * <br>
	 * 
	 * The <code>length</code> and <code>alphabets</code> define the type of data that can be modeled and therefore
	 * both has to be checked.
	 * 
	 * @param sequence
	 *            the sequence
	 * @param startpos
	 *            the start position
	 * 
	 * @return the probability or the value of the density function of the part of the given sequence given the model
	 * 
	 * @throws Exception
	 *             an Exception should be thrown if the sequence could not be handled by the model
	 * @throws NotTrainedException
	 *             a NotTrainedException should be thrown if the model is not trained yet.
	 */
	public double getProbFor( Sequence sequence, int startpos ) throws NotTrainedException, Exception;

	/**
	 * Returns the probability of the given sequence given the model. If a least one random variable is continuous the
	 * value of density function is returned.
	 * 
	 * <br>
	 * <br>
	 * 
	 * It extends the possibility given by the method getProbFor(Sequence sequence, int startpos) by the fact, that the
	 * model could be e.g. homogeneous and therefore the length of the sequences, whose probability should be returned, is
	 * not fixed. Additionally the end position of the part of the given sequence is given and the probability of the
	 * part from position <code>startpos</code> to <code>endpos</code> (inclusive) should be returned.
	 * 
	 * <br>
	 * 
	 * The <code>length</code> and <code>alphabets</code> define the type of data that can be modeled and therefore
	 * both has to be checked.
	 * 
	 * @param sequence
	 *            the sequence
	 * @param startpos
	 *            the start position
	 * @param endpos
	 *            the last position to be taken into account
	 * @return the probability or the value of the density function of the part of the given sequence given the model
	 * @throws Exception
	 *             an Exception should be thrown if the sequence could not be handled (e.g. startpos &gt; endpos, endpos
	 *             &gt; sequence.length, ...) by the model
	 * @throws NotTrainedException
	 *             a NotTrainedException should be thrown if the model is not trained yet.
	 */
	public double getProbFor( Sequence sequence, int startpos, int endpos ) throws NotTrainedException, Exception;

	/**
	 * Returns the logarithm of the probability of the given sequence given the model. If a least one random variable is continuous the
	 * value of density function is returned.
	 * 
	 * <br>
	 * <br>
	 * 
	 * For more details see {@link Model#getProbFor(Sequence, int, int)}
	 * 
	 * @param sequence
	 *            the sequence
	 * @param startpos
	 *            the start position
	 * @param endpos
	 *            the last position to be taken into account
	 *            
	 * @return the logarithm of probability or the value of the density function of the part of the given sequence given the model
	 * 
	 * @throws Exception
	 *             an Exception should be thrown if the sequence could not be handled (e.g. startpos &gt; endpos, endpos
	 *             &gt; sequence.length, ...) by the model
	 * @throws NotTrainedException
	 *             a NotTrainedException should be thrown if the model is not trained yet.
	 *
	 * @see Model#getProbFor(Sequence, int, int)
	 */
	public double getLogProbFor( Sequence sequence, int startpos, int endpos ) throws Exception;

	/**
	 * Returns the logarithm of the probability of the given sequence given the model. If a least one random variable is continuous the
	 * value of density function is returned.
	 * 
	 * <br>
	 * <br>
	 * 
	 * For more details see {@link Model#getProbFor(Sequence, int)}
	 * 
	 * @param sequence
	 *            the sequence
	 * @param startpos
	 *            the start position
	 *            
	 * @return the logarithm of probability or the value of the density function of the part of the given sequence given the model
	 * 
	 * @throws Exception
	 *             an Exception should be thrown if the sequence could not be handled by the model
	 * @throws NotTrainedException
	 *             a NotTrainedException should be thrown if the model is not trained yet.
	 *             
	 * @see Model#getProbFor(Sequence, int)
	 */
	public double getLogProbFor( Sequence sequence, int startpos ) throws Exception;

	/**
	 * Returns the logarithm of the probability of the given sequence given the model. If a least one random variable is continuous the
	 * value of density function is returned.
	 * 
	 * <br>
	 * <br>
	 * 
	 * For more details see {@link Model#getProbFor(Sequence)}
	 * 
	 * @param sequence
	 *            the sequence
	 *            
	 * @return the logarithm of probability or the value of the density function of the part of the given sequence given the model
	 * 
	 * @throws Exception
	 *             an Exception should be thrown if the sequence could not be handled by the model
	 * @throws NotTrainedException
	 *             a NotTrainedException should be thrown if the model is not trained yet.
	 *             
	 * @see Model#getProbFor(Sequence)
	 */
	public double getLogProbFor( Sequence sequence ) throws Exception;

	/**
	 * This method computes the logarithm of the probabilities of all sequences in the given sample. The values are stored in
	 * the array according to the index of the sequence in the sample.
	 * 
	 * <br><br>
	 * 
	 * The probability for any sequence shall be computed independent of all other sequences in the sample. So the result
	 * should be exactly the same as for the method <code>getLogProbFor(Sequence)</code> 
	 * 
	 * @param data
	 *            the sample
	 * 
	 * @return an array containing the logarithm of the probabilities of all sequences of the sample
	 * 
	 * @throws Exception
	 *             if something went wrong
	 * 
	 * @see Model#getLogProbFor(Sequence)
	 */
	public double[] getLogProbFor( Sample data ) throws Exception;

	/**
	 * This method computes and stores the logarithm of the probabilities for any sequence in the sample in the given
	 * <code>double</code> array. 
	 * 
	 * <br><br>
	 * 
	 * The probability for any sequence shall be computed independent of all other sequences in the sample. So the result
	 * should be exactly the same as for the method <code>getLogProbFor(Sequence)</code>
	 * 
	 * @param data
	 *            the sample
	 * @param res
	 *            the array for the results, has to have length <code>data.getNumberOfElements()</code>
	 * 
	 * @throws Exception if something went wrong
	 * 
	 * @see Model#getLogProbFor(Sample)
	 */
	public void getLogProbFor( Sample data, double[] res ) throws Exception;

	/**
	 * Returns a value that is proportional to the prior. For ML 1 should be returned.
	 * 
	 * @return a value that is proportional to the prior
	 * 
	 * @throws Exception
	 *             if something went wrong
	 */
	public double getPriorTerm() throws Exception;

	/**
	 * Returns a value that is proportional to the log of the prior. For ML 0 should be returned.
	 * 
	 * @return a value that is proportional to the log of the prior
	 * 
	 * @throws Exception
	 *             if something went wrong
	 * 
	 * @see Model#getPriorTerm()
	 */
	public double getLogPriorTerm() throws Exception;

	/**
	 * This method returns a Sample object containing artificial sequence(s).
	 * 
	 * <br><br>
	 * 
	 * There are 2 different possibilities to create a sample for a model with length 0.
	 * <ol>
	 * <li> <code>emitSample( int n, int l )</code> should return a sample with <code>n</code> sequences of length <code>l</code>.
	 * <li> <code>emitSample( int n, int[] l )</code> should return a sample with <code>n</code> sequences which have a sequence length corresponding to the entry in the array
	 * </ol>
	 * 
	 * <br><br>
	 * 
	 * There are 2 different possibilities to create a sample for a model with length greater than 0.
	 * <code>emitSample( int n )</code> and <code>emitSample( int n, null )</code> should return a sample with
	 * <code>n</code> sequences of length of the model ({@link Model#getLength()})
	 * 
	 * <br><br>
	 * 
	 * The standard implementation throws an Exception.
	 * 
	 * @param numberOfSequences
	 * 			  the number of sequences that should be contained in the returned sample
	 * @param seqLength
	 *            the length of the sequences for a homogeneous model; for an inhomogeneous model this parameter should be
	 *            <code>null</code> or an array of size 0.
	 * 
	 * @return Sample containing the artificial sequence(s)
	 * 
	 * @throws Exception
	 *             an Exception should be thrown if the emission did not succeed.
	 * @throws NotTrainedException
	 *             a NotTrainedException should be thrown if the model is not trained yet.
	 * 
	 * @see Sample
	 */
	public Sample emitSample( int numberOfSequences, int... seqLength ) throws NotTrainedException, Exception;

	/**
	 * Returns the container of alphabets that were used when constructing the model.
	 * 
	 * @return the alphabet
	 */
	public AlphabetContainer getAlphabetContainer();

	/**
	 * Should return a <b>short</b> instance name such as iMM(0), BN(2), ...
	 * 
	 * @return a short instance name
	 */
	public String getInstanceName();

	/**
	 * Returns the length of sequence this model can classify. Models that can only classify sequences of defined length
	 * are e.g. PWM or inhomogeneous Markov models. If the model can classify sequences of arbitrary length, e.g.
	 * homogeneous Markov models, this method returns 0 (zero).
	 * 
	 * @return the length
	 */
	public int getLength();

	/**
	 * This method returns the maximal used markov order if possible.
	 * 
	 * @return maximal used markov order
	 * 
	 * @throws UnsupportedOperationException
	 *             if the model can't give a proper answer
	 */
	public byte getMaximalMarkovOrder() throws UnsupportedOperationException;

	/**
	 * Returns true if the model has been trained successfully, false otherwise.
	 * 
	 * @return true if the model has been trained successfully, false otherwise.
	 */
	public boolean isTrained();

	/**
	 * Returns some information characterizing or describing the current instance of the model. This could be e.g. the
	 * number of edges for a Bayesian network or an image showing some representation of the model. The set of
	 * characteristics should always include the XML-representation of the model. The corresponding result type is
	 * <code>ObjectResult</code>
	 * 
	 * @return the characteristics
	 * 
	 * @throws Exception
	 *             an <code>Exception</code> is thrown if some of the characteristics could not be defined
	 *             
	 * @see de.jstacs.results.StorableResult
	 */
	public ResultSet getCharacteristics() throws Exception;

	/**
	 * Returns the subset of numerical values that are also returned by <code>getCharacteristsics</code>.
	 * 
	 * @return the numerical characteristics
	 * @throws Exception
	 *             an <code>Exception</code> is thrown if some of the characteristics could not be defined
	 */
	public NumericalResultSet getNumericalCharacteristics() throws Exception;

	/**
	 * Should give a simple representation (text) of the model as String.
	 * 
	 * @return the representation as String
	 */
	public String toString();

	/**
	 * This method tries to set a new instance of an AlphabetContainer for the current model. <b>This instance has to be
	 * consistent with the underlying instance of an AlphabetContainer.</b>
	 * 
	 * <br>
	 * <br>
	 * 
	 * This method can be very usefull to save time.
	 * 
	 * @param abc
	 *            the alphabets
	 * 
	 * @return <code>true</code> if the new instance could be set
	 * 
	 * @see Model#getAlphabetContainer()
	 * @see AlphabetContainer#checkConsistency(AlphabetContainer)
	 */
	public boolean setNewAlphabetContainerInstance( AlphabetContainer abc );

}