/*
 * This file is part of Jstacs.
 *
 * Jstacs is free software: you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 * 
 * Jstacs is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with Jstacs. If not, see <http://www.gnu.org/licenses/>.
 * 
 * For more information on Jstacs, visit http://www.jstacs.de
 */

package de.jstacs.models;

import de.jstacs.NotTrainedException;
import de.jstacs.Storable;
import de.jstacs.data.AlphabetContainer;
import de.jstacs.data.Sample;
import de.jstacs.data.Sequence;
import de.jstacs.results.NumericalResultSet;
import de.jstacs.results.ResultSet;

/**
 * This interface defines all methods for a probabilistic model.
 * 
 * @author Andre Gohr, Jan Grau, Jens Keilwagen
 */
public interface Model extends Cloneable, Storable {

	/**
	 * Creates a clone (deep copy) of the current {@link Model} instance.
	 * 
	 * @return the cloned instance
	 * 
	 * @throws CloneNotSupportedException
	 *             if something went wrong while cloning
	 */
	public Model clone() throws CloneNotSupportedException;

	/**
	 * Trains the {@link Model} object given the data as {@link Sample}. <br>
	 * This method should work non-incrementally. That means the result of the
	 * following series: <code>train(data1)</code>; <code>train(data2)</code>
	 * should be a fully trained model over <code>data2</code> and not over
	 * <code>data1+data2</code>. All parameters of the model were given by the
	 * call of the constructor.
	 * 
	 * @param data
	 *            the given sequences as {@link Sample}
	 * @throws Exception
	 *             if the training did not succeed
	 * 
	 * @see Sample#getElementAt(int)
	 * @see de.jstacs.data.Sample.ElementEnumerator
	 */
	public void train(Sample data) throws Exception;

	/**
	 * Trains the {@link Model} object given the data as {@link Sample} using
	 * the specified weights. The weight at position i belongs to the element at
	 * position i. So the array <code>weight</code> should have the number of
	 * sequences in the sample as dimension. (Optionally it is possible to use
	 * <code>weight == null</code> if all weights have the value one.)<br>
	 * This method should work non-incrementally. That means the result of the
	 * following series: <code>train(data1)</code>; <code>train(data2)</code>
	 * should be a fully trained model over <code>data2</code> and not over
	 * <code>data1+data2</code>. All parameters of the model were given by the
	 * call of the constructor.
	 * 
	 * @param data
	 *            the given sequences as {@link Sample}
	 * @param weights
	 *            the weights of the elements, each weight should be
	 *            non-negative
	 * @throws Exception
	 *             if the training did not succeed (e.g. the dimension of
	 *             <code>weights</code> and the number of sequences in the
	 *             sample do not match)
	 * 
	 * @see Sample#getElementAt(int)
	 * @see de.jstacs.data.Sample.ElementEnumerator
	 */
	public void train(Sample data, double[] weights) throws Exception;

	/**
	 * Returns the probability of the given sequence given the model. If at
	 * least one random variable is continuous the value of the density function
	 * is returned.
	 * 
	 * <br>
	 * 
	 * The <code>length</code> and the <code>alphabets</code> define the type of
	 * data that can be modeled and therefore both has to be checked.
	 * 
	 * @param sequence
	 *            the given sequence for which the probability/the value of the
	 *            density function should be returned
	 * 
	 * @return the probability or the value of the density function of the given
	 *         sequence given the model
	 * 
	 * @throws Exception
	 *             if the sequence could not be handled by the model
	 * @throws NotTrainedException
	 *             if the model is not trained yet
	 */
	public double getProbFor(Sequence sequence) throws NotTrainedException,
			Exception;

	/**
	 * Returns the probability of (a part of) the given sequence given the
	 * model. If at least one random variable is continuous the value of density
	 * function is returned.
	 * 
	 * <br>
	 * <br>
	 * 
	 * If the length of the sequences, whose probability should be returned, is
	 * fixed (e.g. in a inhomogeneous model) and the given sequence is longer
	 * than their fixed length, the start position within the given sequence is
	 * given by <code>startpos</code>. E.g. the fixed length is 12. The length
	 * of the given sequence is 30 and the
	 * <code>startpos</code>=15 the probability
	 * of the part from position 15 to 26 (inclusive) given the model should be returned.
	 * 
	 * <br>
	 * 
	 * The <code>length</code> and the <code>alphabets</code> define the type of
	 * data that can be modeled and therefore both has to be checked.
	 * 
	 * @param sequence
	 *            the given sequence
	 * @param startpos
	 *            the start position within the given sequence
	 * 
	 * @return the probability or the value of the density function of (the part
	 *         of) the given sequence given the model
	 * 
	 * @throws Exception
	 *             if the sequence could not be handled by the model
	 * @throws NotTrainedException
	 *             if the model is not trained yet
	 */
	public double getProbFor(Sequence sequence, int startpos)
			throws NotTrainedException, Exception;

	/**
	 * Returns the probability of (a part of) the given sequence given the
	 * model. If at least one random variable is continuous the value of density
	 * function is returned.
	 * 
	 * <br>
	 * <br>
	 * 
	 * It extends the possibility given by the method
	 * {@link #getProbFor(Sequence, int)} by the fact, that the model could be
	 * e.g. homogeneous and therefore the length of the sequences, whose
	 * probability should be returned, is not fixed. Additionally the end
	 * position of the part of the given sequence is given and the probability
	 * of the part from position <code>startpos</code> to <code>endpos</code>
	 * (inclusive) should be returned.
	 * 
	 * <br>
	 * 
	 * The <code>length</code> and the <code>alphabets</code> define the type of
	 * data that can be modeled and therefore both has to be checked.
	 * 
	 * @param sequence
	 *            the given sequence
	 * @param startpos
	 *            the start position within the given sequence
	 * @param endpos
	 *            the last position to be taken into account
	 * 
	 * @return the probability or the value of the density function of (the part
	 *         of) the given sequence given the model
	 * @throws Exception
	 *             if the sequence could not be handled (e.g.
	 *             <code>startpos &gt; endpos</code>, <code>endpos
	 *             &gt; sequence.length</code>, ...) by the model
	 * @throws NotTrainedException
	 *             if the model is not trained yet
	 */
	public double getProbFor(Sequence sequence, int startpos, int endpos)
			throws NotTrainedException, Exception;

	/**
	 * Returns the logarithm of the probability of (a part of) the given
	 * sequence given the model. If at least one random variable is continuous
	 * the value of density function is returned.
	 * 
	 * <br>
	 * <br>
	 * 
	 * For more details see {@link Model#getProbFor(Sequence, int, int)}
	 * 
	 * @param sequence
	 *            the given sequence
	 * @param startpos
	 *            the start position within the given sequence
	 * @param endpos
	 *            the last position to be taken into account
	 * 
	 * @return the logarithm of the probability or the value of the density
	 *         function of (the part of) the given sequence given the model
	 * 
	 * @throws Exception
	 *             if the sequence could not be handled (e.g.
	 *             <code>startpos &gt; </code>, <code>endpos
	 *             &gt; sequence.length</code>, ...) by the model
	 * @throws NotTrainedException
	 *             if the model is not trained yet
	 * 
	 * @see Model#getProbFor(Sequence, int, int)
	 */
	public double getLogProbFor(Sequence sequence, int startpos, int endpos)
			throws Exception;

	/**
	 * Returns the logarithm of the probability of (a part of) the given
	 * sequence given the model. If at least one random variable is continuous
	 * the value of density function is returned.
	 * 
	 * <br>
	 * <br>
	 * 
	 * For more details see {@link Model#getProbFor(Sequence, int)}
	 * 
	 * @param sequence
	 *            the given sequence
	 * @param startpos
	 *            the start position within the given sequence
	 * 
	 * @return the logarithm of the probability or the value of the density
	 *         function of (the part of) the given sequence given the model
	 * 
	 * @throws Exception
	 *             if the sequence could not be handled by the model
	 * @throws NotTrainedException
	 *             if the model is not trained yet
	 * 
	 * @see Model#getProbFor(Sequence, int)
	 */
	public double getLogProbFor(Sequence sequence, int startpos)
			throws Exception;

	/**
	 * Returns the logarithm of the probability of the given sequence given the
	 * model. If at least one random variable is continuous the value of density
	 * function is returned.
	 * 
	 * <br>
	 * <br>
	 * 
	 * For more details see {@link Model#getProbFor(Sequence)}
	 * 
	 * @param sequence
	 *            the given sequence for which the logarithm of the
	 *            probability/the value of the density function should be
	 *            returned
	 * 
	 * @return the logarithm of the probability or the value of the density
	 *         function of the part of the given sequence given the model
	 * 
	 * @throws Exception
	 *             if the sequence could not be handled by the model
	 * @throws NotTrainedException
	 *             if the model is not trained yet
	 * 
	 * @see Model#getProbFor(Sequence)
	 */
	public double getLogProbFor(Sequence sequence) throws Exception;

	/**
	 * This method computes the logarithm of the probabilities of all sequences
	 * in the given sample. The values are stored in an array according to the
	 * index of the respective sequence in the sample.
	 * 
	 * <br>
	 * <br>
	 * 
	 * The probability for any sequence shall be computed independent of all
	 * other sequences in the sample. So the result should be exactly the same
	 * as for the method {@link #getLogProbFor(Sequence)}.
	 * 
	 * @param data
	 *            the sample of sequences
	 * 
	 * @return an array containing the logarithm of the probabilities of all
	 *         sequences of the sample
	 * 
	 * @throws Exception
	 *             if something went wrong
	 * 
	 * @see Model#getLogProbFor(Sequence)
	 */
	public double[] getLogProbFor(Sample data) throws Exception;

	/**
	 * This method computes and stores the logarithm of the probabilities for
	 * any sequence in the sample in the given <code>double</code>-array.
	 * 
	 * <br>
	 * <br>
	 * 
	 * The probability for any sequence shall be computed independent of all
	 * other sequences in the sample. So the result should be exactly the same
	 * as for the method {@link #getLogProbFor(Sequence)}.
	 * 
	 * @param data
	 *            the sample of sequences
	 * @param res
	 *            the array for the results, has to have length
	 *            <code>data.getNumberOfElements()</code> (which returns the
	 *            number of sequences in the sample)
	 * 
	 * @throws Exception
	 *             if something went wrong
	 * 
	 * @see Model#getLogProbFor(Sample)
	 */
	public void getLogProbFor(Sample data, double[] res) throws Exception;

	/**
	 * Returns a value that is proportional to the prior. For ML 1 should be
	 * returned.
	 * 
	 * @return a value that is proportional to the prior
	 * 
	 * @throws Exception
	 *             if something went wrong
	 */
	public double getPriorTerm() throws Exception;

	/**
	 * Returns a value that is proportional to the log of the prior. For ML 0
	 * should be returned.
	 * 
	 * @return a value that is proportional to the log of the prior
	 * 
	 * @throws Exception
	 *             if something went wrong
	 * 
	 * @see Model#getPriorTerm()
	 */
	public double getLogPriorTerm() throws Exception;

	/**
	 * This method returns a {@link Sample} object containing artificial
	 * sequence(s).
	 * 
	 * <br>
	 * <br>
	 * 
	 * There are two different possibilities to create a sample for a model with
	 * length 0 (homogeneous models).
	 * <ol>
	 * <li> <code>emitSample( int n, int l )</code> should return a sample with
	 * <code>n</code> sequences of length <code>l</code>.
	 * <li> <code>emitSample( int n, int[] l )</code> should return a sample with
	 * <code>n</code> sequences which have a sequence length corresponding to
	 * the entry in the given array <code>l</code>.
	 * </ol>
	 * 
	 * <br>
	 * 
	 * There are two different possibilities to create a sample for a model with
	 * length greater than 0 (inhomogeneous models).<br>
	 * <code>emitSample( int n )</code> and
	 * <code>emitSample( int n, null )</code> should return a sample with
	 * <code>n</code> sequences of length of the model (
	 * {@link Model#getLength()}).
	 * 
	 * <br>
	 * <br>
	 * 
	 * The standard implementation throws an {@link Exception}.
	 * 
	 * @param numberOfSequences
	 *            the number of sequences that should be contained in the
	 *            returned sample
	 * @param seqLength
	 *            the length of the sequences for a homogeneous model; for an
	 *            inhomogeneous model this parameter should be <code>null</code>
	 *            or an array of size 0.
	 * 
	 * @return a {@link Sample} containing the artificial sequence(s)
	 * 
	 * @throws Exception
	 *             if the emission did not succeed
	 * @throws NotTrainedException
	 *             if the model is not trained yet
	 * 
	 * @see Sample
	 */
	public Sample emitSample(int numberOfSequences, int... seqLength)
			throws NotTrainedException, Exception;

	/**
	 * Returns the container of alphabets that were used when constructing the
	 * model.
	 * 
	 * @return the container of alphabets that were used when constructing the
	 *         model
	 */
	public AlphabetContainer getAlphabetContainer();

	/**
	 * Should return a <b>short</b> instance name such as iMM(0), BN(2), ...
	 * 
	 * @return a short instance name
	 */
	public String getInstanceName();

	/**
	 * Returns the length of sequences this model can classify. Models that can
	 * only classify sequences of defined length are e.g. PWM or inhomogeneous
	 * Markov models. If the model can classify sequences of arbitrary length,
	 * e.g. homogeneous Markov models, this method returns 0 (zero).
	 * 
	 * @return the length of sequences the model can classify
	 */
	public int getLength();

	/**
	 * This method returns the maximal used Markov order, if possible.
	 * 
	 * @return maximal used Markov order
	 * 
	 * @throws UnsupportedOperationException
	 *             if the model can't give a proper answer
	 */
	public byte getMaximalMarkovOrder() throws UnsupportedOperationException;

	/**
	 * Returns <code>true</code> if the model has been trained successfully,
	 * <code>false</code> otherwise.
	 * 
	 * @return <code>true </code>if the model has been trained successfully,
	 *         <code>false</code> otherwise
	 */
	public boolean isTrained();

	/**
	 * Returns some information characterizing or describing the current
	 * instance of the model. This could be e.g. the number of edges for a
	 * Bayesian network or an image showing some representation of the model.
	 * The set of characteristics should always include the XML-representation
	 * of the model. The corresponding result type is
	 * {@link de.jstacs.results.StorableResult}.
	 * 
	 * @return the characteristics of the current instance of the model
	 * 
	 * @throws Exception
	 *             if some of the characteristics could not be defined
	 * 
	 * @see de.jstacs.results.StorableResult
	 */
	public ResultSet getCharacteristics() throws Exception;

	/**
	 * Returns the subset of numerical values that are also returned by
	 * {@link #getCharacteristics()}.
	 * 
	 * @return the numerical characteristics of the current instance of the
	 *         model
	 * 
	 * @throws Exception
	 *             if some of the characteristics could not be defined
	 */
	public NumericalResultSet getNumericalCharacteristics() throws Exception;

	/**
	 * Should give a simple representation (text) of the model as {@link String}
	 * .
	 * 
	 * @return the representation as {@link String}
	 */
	public String toString();

	/**
	 * This method tries to set a new instance of an {@link AlphabetContainer}
	 * for the current model. <b>This instance has to be consistent with the
	 * underlying instance of an {@link AlphabetContainer}.</b>
	 * 
	 * <br>
	 * <br>
	 * 
	 * This method can be very useful to save time.
	 * 
	 * @param abc
	 *            the alphabets in an {@link AlphabetContainer}
	 * 
	 * @return <code>true</code> if the new instance could be set
	 * 
	 * @see Model#getAlphabetContainer()
	 * @see AlphabetContainer#checkConsistency(AlphabetContainer)
	 */
	public boolean setNewAlphabetContainerInstance(AlphabetContainer abc);

}