/*
 * This file is part of Jstacs.
 * 
 * Jstacs is free software: you can redistribute it and/or modify it under the
 * terms of the GNU General Public License as published by the Free Software
 * Foundation, either version 3 of the License, or (at your option) any later
 * version.
 * 
 * Jstacs is distributed in the hope that it will be useful, but WITHOUT ANY
 * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
 * A PARTICULAR PURPOSE. See the GNU General Public License for more details.
 * 
 * You should have received a copy of the GNU General Public License along with
 * Jstacs. If not, see <http://www.gnu.org/licenses/>.
 * 
 * For more information on Jstacs, visit http://www.jstacs.de
 */

package de.jstacs.classifier;

import java.util.LinkedList;

import de.jstacs.NonParsableException;
import de.jstacs.Storable;
import de.jstacs.classifier.MeasureParameters.Measure;
import de.jstacs.data.AlphabetContainer;
import de.jstacs.data.Sample;
import de.jstacs.data.Sequence;
import de.jstacs.data.Sample.ElementEnumerator;
import de.jstacs.io.XMLParser;
import de.jstacs.parameters.ParameterException;
import de.jstacs.results.CategoricalResult;
import de.jstacs.results.NumericalResult;
import de.jstacs.results.NumericalResultSet;
import de.jstacs.results.Result;
import de.jstacs.results.ResultSet;
import de.jstacs.results.StorableResult;

/**
 * The super class for any classifier.
 * 
 * <br>
 * <br>
 * 
 * <a name="order"> The order of the classes is never changed inside the
 * classifier. The samples you put in the methods like <code>train, test</code>
 * and <code>evaluate</code> should always have the same order that you have
 * used while instantiation of the object.</a>
 * 
 * <br>
 * <br>
 * 
 * <b>For two classes it is highly recommended to set the foreground as first
 * class and the second class as background.</b>
 * 
 * @author Jens Keilwagen, Jan Grau
 */
public abstract class AbstractClassifier implements Storable, Cloneable {

	/**
	 * Returns an object of the parameters for the <code>evaluate</code>-method.
	 * The parameters can be set and the measures can be switched on or off.
	 * 
	 * @return an object of the parameters for the <code>evaluate</code>-method
	 * 
	 * @throws ParameterException
	 *             if something went wrong while constructing the parameter
	 *             object
	 * 
	 * @see AbstractClassifier#evaluate(MeasureParameters, boolean, Sample...)
	 */
	public static final MeasureParameters getMeasuresForEvaluate() throws ParameterException {
		return new MeasureParameters( false );
	}

	/**
	 * Returns an object of the parameters for the <code>evaluateAll</code>
	 * -method. The parameters can be set and the measures can be switched on or
	 * off.
	 * 
	 * @return an object of the parameters for the <code>evaluateAll</code>
	 *         -method
	 * 
	 * @throws ParameterException
	 *             if something went wrong while constructing the parameter
	 *             object
	 * 
	 * @see AbstractClassifier#evaluateAll(MeasureParameters, boolean,
	 *      Sample...)
	 */
	public static final MeasureParameters getMeasuresForEvaluateAll() throws ParameterException {
		return new MeasureParameters( true );
	}

	/**
	 * The underlying alphabet
	 */
	private AlphabetContainer alphabets;

	/**
	 * The underlying length
	 */
	private int length;

	/**
	 * The constructor for a homogeneous classifier. Such a classifier can
	 * handle sequences of arbitrary length.
	 * 
	 * @param abc
	 *            the {@link AlphabetContainer}
	 */
	public AbstractClassifier( AlphabetContainer abc ) {
		this( abc, 0 );
	}

	/**
	 * The constructor for an inhomogeneous classifier. Such a classifier can
	 * handle sequences of arbitrary length.
	 * 
	 * @param abc
	 *            the alphabets that are used
	 * @param length
	 *            the length of the sequences that can be classified
	 * 
	 * @throws IllegalArgumentException
	 *             if the length and the possible length of the
	 *             {@link AlphabetContainer} does not match
	 */
	public AbstractClassifier( AlphabetContainer abc, int length ) throws IllegalArgumentException {
		int l = abc.getPossibleLength();
		if( l != 0 && l != length ) {
			throw new IllegalArgumentException( "The length and the possible length of the AlphabetContainer does not match." );
		}
		alphabets = abc;
		this.length = length;
	}

	/**
	 * The constructor for the {@link Storable} interface.
	 * 
	 * @param xml
	 *            the XML representation
	 * 
	 * @throws NonParsableException
	 *             if the XML format is not parsable
	 */
	public AbstractClassifier( StringBuffer xml ) throws NonParsableException {
		alphabets = null;
		length = -1;
		fromXML( xml );
		if( length < 0 || alphabets == null ) {
			throw new NonParsableException( "The alphabets or the length were not set." );
		}
	}

	/**
	 * This method classifies a sequence and returns the index <code>i</code>,
	 * with <code>0 &lt; i &lt; getNumberOfClasses()</code>, of the class to
	 * which the sequence is assigned.
	 * 
	 * <br>
	 * <br>
	 * 
	 * This method should check that the sequence is defined over the underlying
	 * alphabet and length.
	 * 
	 * @param seq
	 *            the sequence to be classified
	 * 
	 * @return the index of the class to which the sequence is assigned
	 * 
	 * @throws Exception
	 *             if the classifier is not trained or something is wrong with
	 *             the sequence
	 */
	public abstract byte classify( Sequence seq ) throws Exception;

	/**
	 * This method classifies all sequences of a sample and returns the index
	 * <code>i</code>, with <code>0 &lt; i &lt; getNumberOfClasses()</code>, of
	 * the class to which the sequence is assigned.
	 * 
	 * @param s
	 *            the sample to be classified
	 * 
	 * @return an array of class assignments
	 * 
	 * @throws Exception
	 *             if something went wrong while classification
	 */
	public byte[] classify( Sample s ) throws Exception {
		byte[] clazz = new byte[s.getNumberOfElements()];
		ElementEnumerator ei = new ElementEnumerator( s );
		for( int i = 0; i < clazz.length; i++ ) {
			clazz[i] = classify( ei.nextElement() );
		}
		return clazz;
	}

	/* (non-Javadoc)
	 * @see java.lang.Object#clone()
	 */
	@Override
	public AbstractClassifier clone() throws CloneNotSupportedException {
		return (AbstractClassifier)super.clone();
	}

	/**
	 * This method evaluates the classifier and computes all numerical results
	 * as for instance the sensitivity for a given specificity, the area under
	 * ROC curve and so on. This method should be used in any kind of classifier
	 * assessment as for instance crossvalidation, hold out sampling, ... .
	 * 
	 * <br>
	 * <br>
	 * 
	 * For two classes it is highly recommended to set the foreground as first
	 * class and the second class as background, i.e. the first sample should be
	 * the foreground sample and the second should be background sample. See
	 * also <a href="#order">this comment</a>.
	 * 
	 * @param params
	 *            the current parameters
	 * @param exceptionIfNotComputeable
	 *            if <code>true</code> the method throws an exception if a
	 *            measure could not be computed, otherwise it is ignored
	 * @param s
	 *            the array of {@link Sample}s
	 * 
	 * @return a set of numerical results
	 * 
	 * @throws Exception
	 *             if something went wrong
	 * 
	 * @see de.jstacs.classifier.assessment.ClassifierAssessment#assess(MeasureParameters,
	 *      de.jstacs.classifier.assessment.ClassifierAssessmentAssessParameterSet,
	 *      de.jstacs.utils.ProgressUpdater, Sample...)
	 * @see de.jstacs.classifier.assessment.ClassifierAssessment#assess(MeasureParameters,
	 *      de.jstacs.classifier.assessment.ClassifierAssessmentAssessParameterSet,
	 *      Sample...)
	 * @see de.jstacs.classifier.assessment.ClassifierAssessment#assess(MeasureParameters,
	 *      de.jstacs.classifier.assessment.ClassifierAssessmentAssessParameterSet,
	 *      de.jstacs.utils.ProgressUpdater, Sample[][][])
	 */
	@SuppressWarnings("unchecked")
	public final NumericalResultSet evaluate( MeasureParameters params, boolean exceptionIfNotComputeable, Sample... s ) throws Exception {
		return new NumericalResultSet( (LinkedList<NumericalResult>)getResults( s, params, exceptionIfNotComputeable, false ) );
	}

	/**
	 * This method evaluates the classifier and computes all results.
	 * 
	 * @param params
	 *            the current parameters
	 * @param exceptionIfNotComputeable
	 *            if <code>true</code> the method throws an exception if a
	 *            measure could not be computed, otherwise it is ignored
	 * @param s
	 *            the array of {@link Sample}s
	 * 
	 * @return a set of results
	 * 
	 * @throws Exception
	 *             if something went wrong
	 */
	public final ResultSet evaluateAll( MeasureParameters params, boolean exceptionIfNotComputeable, Sample... s ) throws Exception {
		return new ResultSet( getResults( s, params, exceptionIfNotComputeable, true ) );
	}

	/**
	 * This method computes the results for any evaluation of the classifier.
	 * 
	 * @param s
	 *            the array of {@link Sample}s
	 * @param params
	 *            the current parameters
	 * @param exceptionIfNotComputeable
	 *            if <code>true</code> the method throws an exception if a
	 *            measure could not be computed, otherwise it is ignored
	 * @param all
	 *            if <code>true</code> the method computes all results, if
	 *            <code>false</code> it computes only the numerical results
	 * 
	 * @return a list of results
	 * 
	 * @throws Exception
	 *             if something went wrong
	 * 
	 * @see AbstractClassifier#evaluate(MeasureParameters, boolean, Sample...)
	 * @see AbstractClassifier#evaluateAll(MeasureParameters, boolean,
	 *      Sample...)
	 */
	protected LinkedList<? extends Result> getResults( Sample[] s, MeasureParameters params, boolean exceptionIfNotComputeable, boolean all ) throws Exception {
		LinkedList<NumericalResult> list = new LinkedList<NumericalResult>();
		int numSelected = params.getNumberOfValues();
		if( params.isSelected( Measure.ClassificationRate ) ) {
			list.add( getClassificationRate( s ) );
			numSelected--;
		}
		if( exceptionIfNotComputeable && numSelected > 0 ) {
			throw new IllegalArgumentException( "There are measure that could not be evaluate with this classifier (" + this.getClass()
												+ ")" );
		}
		return list;
	}

	/**
	 * This method computes the classification rate for a given array of
	 * samples.
	 * 
	 * @param s
	 *            the array of samples; sample 0 contains only elements of class
	 *            0; sample 1 ...
	 * 
	 * @return the classification rate
	 * 
	 * @throws Exception
	 *             if something went wrong while classification
	 */
	protected final NumericalResult getClassificationRate( Sample[] s ) throws Exception {
		if( s.length != getNumberOfClasses() ) {
			throw new ClassDimensionException();
		}
		int correct = 0;
		double n = 0;
		for( int i = 0; i < s.length; i++ ) {
			if( s != null ) {
				n += s[i].getNumberOfElements();
				for( int j = 0; j < s[i].getNumberOfElements(); j++ ) {
					if( this.classify( s[i].getElementAt( j ) ) == i ) {
						correct++;
					}
				}
			}
		}
		return new NumericalResult( Measure.ClassificationRate.getNameString(),
				Measure.ClassificationRate.getCommentString(),
				(double)correct / n );
	}

	/**
	 * This method returns the container of alphabets that is used in the
	 * classifier.
	 * 
	 * @return the used alphabet
	 */
	public final AlphabetContainer getAlphabetContainer() {
		return alphabets;
	}

	/**
	 * Returns some information characterizing or describing the current
	 * instance of the model. This could be for instance the number of edges for
	 * a Bayesian network or an image showing some representation of the model
	 * of a class. The set of characteristics should always include the XML
	 * representation of the classifier. The corresponding result type is
	 * <code>ObjectResult</code>.
	 * 
	 * @return the characteristics
	 * 
	 * @throws Exception
	 *             an <code>Exception</code> is thrown if some of the
	 *             characteristics could not be defined
	 * 
	 * @see StorableResult
	 */
	public ResultSet getCharacteristics() throws Exception {
		return new ResultSet( getNumericalCharacteristics().getResults(), new Result[]{ new StorableResult( "classifer",
				"the xml representation of the classifier",
				this ) } );
	}

	/**
	 * Returns a <b>short</b> description of the classifier.
	 * 
	 * @return a <b>short</b> description of the classifier
	 */
	public abstract String getInstanceName();

	/**
	 * Returns an array of {@link Result}s of dimension
	 * {@link #getNumberOfClasses()} that contains information about the
	 * classifier and for each class.<br>
	 * 
	 * <code>
	 * res[0] = new CategoricalResult( "classifier", "the kind of classifier", getInstanceName() );<br>
	 * res[1] = new CategoricalResult( "class info 0", "some information about the class", "info0" );<br>
	 * res[2] = new CategoricalResult( "class info 1", "some information about the class", "info1" );<br>
	 * ...
	 * </code>
	 * 
	 * @return an array of {@link Result}s that contains information the
	 *         classifier
	 */
	public abstract CategoricalResult[] getClassifierAnnotation();

	/**
	 * Returns the length of the sequences this classifier can handle or
	 * <code>0</code> for sequences of arbitrary length.
	 * 
	 * @return the length
	 */
	public final int getLength() {
		return length;
	}

	/**
	 * Returns the subset of numerical values that are also returned by
	 * {@link #getCharacteristics()}.
	 * 
	 * @return the numerical characteristics
	 * 
	 * @throws Exception
	 *             an <code>Exception</code> is thrown if some of the
	 *             characteristics could not be defined
	 */
	public abstract NumericalResultSet getNumericalCharacteristics() throws Exception;

	/**
	 * Returns the number of classes that can be distinguished. For example if
	 * distinguish between foreground and background this method should return
	 * 2, even if you use a mixture model for either foreground or background.
	 * 
	 * @return the number of classes that can be distinguished
	 */
	public abstract int getNumberOfClasses();

	/**
	 * This method gives information about the state of the classifier.
	 * 
	 * @return <code>true</code> if the classifier is able to classify
	 *         sequences, otherwise <code>false</code>
	 */
	public abstract boolean isTrained();

	/**
	 * This method tries to set a new instance of an {@link AlphabetContainer}
	 * for the current model. <b>This instance has to be consistent with the
	 * underlying instance of an {@link AlphabetContainer}.</b>
	 * 
	 * <br>
	 * <br>
	 * 
	 * This method can be very useful to save time.
	 * 
	 * @param abc
	 *            the alphabets
	 * 
	 * @return <code>true</code> if the new container could be set
	 * 
	 * @see AbstractClassifier#getAlphabetContainer()
	 * @see AlphabetContainer#checkConsistency(AlphabetContainer)
	 */
	public boolean setNewAlphabetContainerInstance( AlphabetContainer abc ) {
		if( abc.checkConsistency( alphabets ) ) {
			alphabets = abc;
			return true;
		} else {
			return false;
		}
	}

	/**
	 * This method computes the confusion matrix for a given array of test data.
	 * 
	 * @param testData
	 *            the given array of test data
	 * 
	 * @return the confusion matrix
	 * 
	 * @throws ClassDimensionException
	 *             if the number of samples in incorrect
	 * @throws Exception
	 *             if something went wrong
	 */
	public ConfusionMatrix test( Sample... testData ) throws Exception, ClassDimensionException {
		if( testData.length != getNumberOfClasses() ) {
			throw new ClassDimensionException();
		}

		ConfusionMatrix matrix = new ConfusionMatrix( testData.length );
		ElementEnumerator ei;
		for( int i = 0; i < testData.length; i++ ) {
			if( testData[i] != null ) {
				ei = new ElementEnumerator( testData[i] );
				while( ei.hasMoreElements() ) {
					matrix.add( i, classify( ei.nextElement() ) );
				}
			}
		}
		return matrix;
	}

	/**
	 * Trains the {@link AbstractClassifier} object given the data as
	 * <code>Sample</code>s.<br>
	 * This method should work non-incrementally. That means the result of the
	 * following series: <code>train(data1); train(data2);</code> should be a
	 * fully trained model over <code>data2</code> and not over
	 * <code>data1, data2</code>.
	 * 
	 * <br>
	 * <br>
	 * 
	 * This method should check that the <code>Sample</code>s are defined over
	 * the underlying alphabet and length.
	 * 
	 * @param s
	 *            the data
	 *            <ul>
	 *            <li>either an array of {@link Sample}s:
	 *            <code>train( new Sample[]{s1,s2,s3})</code> or
	 *            <li>a enumeration of {@link Sample}s:
	 *            <code>train(s1,s2,s3)</code>
	 *            </ul>
	 * 
	 * @throws Exception
	 *             an {@link Exception} should be thrown if the training did not
	 *             succeed
	 */
	public void train( Sample... s ) throws Exception {
		train( s, new double[s.length][] );
	}

	/**
	 * This method trains a classifier over an array of weighted
	 * <code>Sample</code>s. That is why the following has to be fulfilled:
	 * 
	 * <ul>
	 * <li> <code>s.length == weights.length</code>
	 * <li>and for all i:
	 * <code>weights[i] == null || s[i].getNumberOfElements() == weights[i].length</code>.
	 * </ul>
	 * 
	 * This method should work non-incrementally as the method
	 * <code>train( Sample[] )</code>.
	 * 
	 * <br>
	 * <br>
	 * 
	 * This method should check that the <code>Sample</code>s are defined over
	 * the underlying alphabet and length.
	 * 
	 * @param s
	 *            an array of {@link Sample}s
	 * @param weights
	 *            the weights for <code>s</code>
	 * 
	 * @throws Exception
	 *             an {@link Exception} should be thrown if the weights are
	 *             incorrect or the training did not succeed
	 */
	public abstract void train( Sample[] s, double[][] weights ) throws Exception;

	// methods for Storable

	/**
	 * Returns the {@link String} that is used as tag for the XML
	 * representation. This method is used by the methods
	 * {@link #fromXML(StringBuffer)} and {@link #toXML()}.
	 * 
	 * @return the {@link String} that is used as tag for the XML representation
	 */
	protected abstract String getXMLTag();

	private void fromXML( StringBuffer representation ) throws NonParsableException {
		StringBuffer xml = XMLParser.extractForTag( representation, getXMLTag() );
		alphabets = (AlphabetContainer)XMLParser.extractStorableForTag( xml, "alphabetcontainer" );
		length = XMLParser.extractIntForTag( xml, "length" );
		extractFurtherClassifierInfosFromXML( xml );
	}

	/**
	 * Extracts further information of a classifier from an XML representation.
	 * This method is used by the method {@link #fromXML(StringBuffer)} and
	 * should not be made public.
	 * 
	 * @param xml
	 *            XML representation
	 * @throws NonParsableException
	 *             if the information could not be parsed out of the
	 *             {@link StringBuffer}
	 */
	protected abstract void extractFurtherClassifierInfosFromXML( StringBuffer xml ) throws NonParsableException;

	/* (non-Javadoc)
	 * @see de.jstacs.Storable#toXML()
	 */
	public final StringBuffer toXML() {
		StringBuffer xml = new StringBuffer( 100000 );
		XMLParser.appendStorableWithTags( xml, alphabets, "alphabetcontainer" );
		XMLParser.appendIntWithTags( xml, length, "length" );
		xml.append( getFurtherClassifierInfos() );
		XMLParser.addTags( xml, getXMLTag() );
		return xml;
	}

	/**
	 * This method returns further information of a classifier as a
	 * {@link StringBuffer}. This method is used by the method {@link #toXML()}
	 * and should not be made public.
	 * 
	 * @return further information of a classifier as a {@link StringBuffer}
	 */
	protected abstract StringBuffer getFurtherClassifierInfos();
}
