/*
 * This file is part of Jstacs.
 *
 * Jstacs is free software: you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 * 
 * Jstacs is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with Jstacs.  If not, see <http://www.gnu.org/licenses/>.
 * 
 * For more information on Jstacs, visit http://www.jstacs.de
 */

package de.jstacs.data;

import java.io.BufferedWriter;
import java.io.File;
import java.io.FileWriter;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Comparator;
import java.util.Enumeration;
import java.util.Hashtable;
import java.util.Random;
import java.util.Map.Entry;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import de.jstacs.WrongAlphabetException;
import de.jstacs.data.Sample.WeightedSampleFactory.SortOperation;
import de.jstacs.data.sequences.ArbitrarySequence;
import de.jstacs.data.sequences.ByteSequence;
import de.jstacs.data.sequences.IntSequence;
import de.jstacs.data.sequences.ShortSequence;
import de.jstacs.data.sequences.WrongSequenceTypeException;
import de.jstacs.io.StringExtractor;
import de.jstacs.io.SymbolExtractor;

/**
 * This is the class for any sample of sequences. All sequences in a sample have to have the same AlphabetContainer. The
 * sequences may have different lengths.
 * 
 * <br>
 * <br>
 * 
 * For the internal representation the class Sequence is used, where the external alphabet is converted to integral
 * numerical values. The class Sample knows about this coding via instances of class AlphabetContainer respectively
 * Alphabet.
 * 
 * <br>
 * <br>
 * 
 * <a name="access"> There are different ways to access the elements of a Sample. If one needs random access there's the
 * method <a href="#getElementAt">getElementAt( int i )</a>. For fast sequential access it is recommended to use an <a
 * href="#ElementEnumerator">ElementEnumerator</a>. </a>
 * 
 * <br>
 * <br>
 * 
 * Sample is immutable.
 * 
 * @author Jens Keilwagen
 * 
 * @see AlphabetContainer
 * @see Sequence
 */
public class Sample
{
	/**
	 * This enum defines different partition method for a sample.
	 * 
	 * @author Jens Keilwagen
	 * 
	 * @see Sample#partition(PartitionMethod, double...)
	 * @see Sample#partition(int, PartitionMethod)
	 * @see Sample#partition(double, PartitionMethod, int)
	 */
	public static enum PartitionMethod {
		/**
		 * This value indicates that the sample will be split by the number of elements.
		 */
		PARTITION_BY_NUMBER_OF_ELEMENTS,

		/**
		 * This value indicates that the sample will be split by the number of &quot;symbols&quot;.
		 */
		PARTITION_BY_NUMBER_OF_SYMBOLS;
	}
	
	/**
	 * Returns the annotation for an array of Samples
	 * 
	 * @param s
	 *            an array of Samples
	 * 
	 * @return the annotation
	 */
	public static final String getAnnotation( Sample... s )
	{
		if( s == null || s.length == 0 )
		{
			return "[]";
		}
		else
		{
			StringBuffer sb = new StringBuffer( s.length * 100 );
			sb.append( s[0].getAnnotation() );
			for( int i = 1; i < s.length; i++ )
			{
				sb.append( ", " );
				sb.append( s[i].getAnnotation() );
			}
			return "[" + sb.toString() + "]";
		}
	}

	/**
	 * This method computes the intersection between all elements of the array, i.e.
	 * it returns a Sample containing only sequences that are contained in all Samples of
	 * the array.
	 * 
	 * @param samples the array
	 * 
	 * @return the intersection of the elements in the array
	 * 
	 * @throws IllegalArgumentException if the elements of the array are from different domains
	 * @throws EmptySampleException if the intersection is empty
	 */
	public static final Sample intersection( Sample... samples ) throws IllegalArgumentException, EmptySampleException
	{
		WeightedSampleFactory[] wsf = new WeightedSampleFactory[samples.length];
		int[] index = new int[samples.length];
		int i = 0, len = -1;
		double anz;
		AlphabetContainer abc = samples[i].getAlphabetContainer();
		while( i < wsf.length )
		{
			if( !abc.checkConsistency( samples[i].getAlphabetContainer() ) )
			{
				throw new IllegalArgumentException( "The sample do not have the same AlphabetContainer." );
			}
			try
			{
				wsf[i] = new WeightedSampleFactory( SortOperation.SORT_BY_SEQUENCE, samples[i++] );
			}
			catch( WrongAlphabetException doesNotHappen )
			{
				RuntimeException r = new RuntimeException( doesNotHappen.getMessage() );
				r.setStackTrace( doesNotHappen.getStackTrace() );
				throw r;
			}
			catch( WrongLengthException doesNotHappen )
			{
				RuntimeException r = new RuntimeException( doesNotHappen.getMessage() );
				r.setStackTrace( doesNotHappen.getStackTrace() );
				throw r;
			}
		}
		boolean goOn = true, same;
		ArrayList<Sequence> list = new ArrayList<Sequence>( 100 );
		String current, help;
		do
		{
			// find candidate
			current = wsf[0].getElementAt( index[0] ).toString();
			for( i = 1; i < samples.length; i++ )
			{
				help = wsf[i].getElementAt( index[i] ).toString();
				if( current.compareTo( help ) < 0 )
				{
					current = help;
				}
			}

			// System.out.print( current + " - " );

			same = true;
			for( i = 0; i < samples.length; i++ )
			{
				help = "";
				while( index[i] < wsf[i].getNumberOfElements()
						&& current.compareTo( (help = wsf[i].getElementAt( index[i] ).toString()) ) > 0 )
				{
					index[i]++;
				}
				same &= current.equals( help );
				// System.out.print( help + "\t" );
			}
			// System.out.println();
			if( same )
			{
				anz = wsf[0].getWeight( index[0] );
				for( i = 1; i < samples.length; i++ )
				{
					if( anz > wsf[i].getWeight( index[i] ) )
					{
						anz = wsf[i].getWeight( index[i] );
					}
					index[i]++;
				}

				if( list.size() == 0 )
				{
					len = wsf[0].getElementAt( index[0] ).getLength();
				}
				else if( len != wsf[0].getElementAt( index[0] ).getLength() )
				{
					len = 0;
				}
				for( i = 0; i < anz; i++ )
				{
					list.add( wsf[0].getElementAt( index[0] ) );
				}
				index[0]++;
			}

			for( i = 0; i < samples.length; i++ )
			{
				if( index[i] == wsf[i].getNumberOfElements() )
				{
					goOn = false;
				}
			}
		}
		while( goOn );
		return new Sample( abc, list.toArray( new Sequence[0] ), len, "intersection of " + getAnnotation( samples ) );
	}

	/**
	 * This method unites all Sample from <code>s</code> regarding <code>in</code>.
	 * 
	 * @param s
	 *            the Samples
	 * @param in
	 *            an array indicating which sample is used in the union, if <code>in[i]==true</code> the sample
	 *            <code>s[i]</code> is used.
	 * 
	 * @return the united sample
	 * 
	 * @throws IllegalArgumentException
	 *             if s.length != in.length or the alphabets do not match
	 * 
	 * @throws EmptySampleException
	 *             if the union is empty
	 */
	public static final Sample union( Sample[] s, boolean[] in ) throws IllegalArgumentException, EmptySampleException
	{
		try
		{
			return union( s, in, 0 );
		}
		catch( WrongLengthException doesNotHappen )
		{
			IllegalArgumentException i = new IllegalArgumentException( doesNotHappen.getMessage() );
			i.setStackTrace( doesNotHappen.getStackTrace() );
			throw i;
		}
	}

	/**
	 * Unites all samples in <code>s</code>
	 * 
	 * @param s the samples
	 * 
	 * @return the united sample
	 *  
	 * @throws IllegalArgumentException if the alphabets do not match
	 * 
	 * @see Sample#union(Sample[], boolean[])
	 */
	public static final Sample union( Sample... s ) throws IllegalArgumentException
	{
		if( s == null || s.length == 0 )
		{
			return null;
		}
		else
		{
			boolean[] in = new boolean[s.length];
			Arrays.fill( in, true );
			try
			{
				return union( s, in );
			}
			catch( EmptySampleException doesNotHappen )
			{
				// since each given sample is not empty, the union can't be empty
				return null;
			}
		}
	}

	/**
	 * This method unites all Sample from <code>s</code> regarding <code>in</code> and sets the element length in
	 * the united sample to <code>subsequenceLength</code>.
	 * 
	 * @param s
	 *            the Samples
	 * @param in
	 *            an array indicating which sample is used in the union, if <code>in[i]==true</code> the sample
	 *            <code>s[i]</code> is used.
	 * @param subsequenceLength
	 *            the length of the elements in the united sample
	 * 
	 * @return the united sample
	 * 
	 * @throws IllegalArgumentException
	 *             if s.length != in.length or the alphabets do not match
	 * @throws EmptySampleException
	 *             if the union is empty
	 * @throws WrongLengthException
	 *             if the united sample does not support this subsequenceLength
	 */
	public static final Sample union( Sample[] s, boolean[] in, int subsequenceLength )
			throws IllegalArgumentException, EmptySampleException, WrongLengthException
	{
		if( s == null || s.length == 0 )
		{
			return null;
		}
		else
		{
			if( in.length != s.length )
			{
				throw new IllegalArgumentException( "The arrays have to have the same dimension." );
			}
			int i = 0, l = s.length, len, anz, start;
			while( i < l && !in[i] )
			{
				i++;
			}
			if(i == l){
				return null;
			}
			start = i;
			len = s[i].getMinimalElementLength();
			anz = s[i].getNumberOfElements();
			String annot = "the union of [" + s[i].getAnnotation();
			boolean seq = s[i++].indexOfFirstSubseq == null, subseq = !seq;
			while( i < l && (!in[i] || s[start].alphabetContainer.checkConsistency( s[i].alphabetContainer )) )
			{
				if( in[i] )
				{
					anz += s[i].getNumberOfElements();
					if( len != 0 && len != s[i].getMinimalElementLength() )
					{
						len = 0;
					}
					seq &= s[i].indexOfFirstSubseq == null;
					subseq &= s[i].indexOfFirstSubseq != null;
					annot += ", " + s[i].getAnnotation();
				}
				i++;
			}
			if( i < l )
			{
				throw new IllegalArgumentException( "The alphabets of the samples do not match." );
			}
			if( len < subsequenceLength )
			{
				// would be asked later anyway, but saves time to do now
				throw new WrongLengthException( subsequenceLength );
			}

			Sequence[] seqs = new Sequence[anz];
			ElementEnumerator ei;
			anz = 0;
			for( i = 0; i < l; i++ )
			{
				if( in[i] )
				{
					ei = new ElementEnumerator( s[i] );
					while( ei.hasMoreElements() )
					{
						seqs[anz++] = ei.nextElement();
					}
				}
			}

			Sample res = new Sample( s[start].alphabetContainer, seqs, len, annot + "]" );
			res.setSubsequenceLength( subsequenceLength );

			return res;
		}
	}

	/**
	 * This method unites all Sample from <code>s</code> and sets the element length in
	 * the united sample to <code>subsequenceLength</code>.
	 * 
	 * @param s
	 *            the Samples
	 * @param subsequenceLength
	 *            the length of the elements in the united sample
	 * 
	 * @return the united sample
	 * 
	 * @throws IllegalArgumentException
	 *             if the alphabets do not match
	 * @throws WrongLengthException
	 *             if the united sample does not support this subsequenceLength
	 *
	 * @see Sample#union(Sample[], boolean[], int)
	 */
	public static final Sample union( Sample[] s, int subsequenceLength ) throws IllegalArgumentException,
			WrongLengthException
	{
		if( s == null || s.length == 0 )
		{
			return null;
		}
		else
		{
			boolean[] in = new boolean[s.length];
			Arrays.fill( in, true );
			try
			{
				return union( s, in, subsequenceLength );
			}
			catch( EmptySampleException doesNotHappen )
			{
				// since each given sample is not empty, the union can't be empty
				return null;
			}
		}
	}

	/**
	 * Some annotation for the sample.
	 */
	private String annotation;

	/**
	 * The AlphabetContainer.
	 */
	private AlphabetContainer alphabetContainer;

	/**
	 * All sequences.
	 */
	private Sequence[] seqs;

	/**
	 * The length of the elements.
	 */
	private int length;

	/**
	 * The index of the first subsequence for each sequence. Since for sequence 0 the index is always 0 the indeces are
	 * shifted one position. That's why the last entry of the array is empty and is used for the number of elements.
	 */
	private int[] indexOfFirstSubseq;

	/**
	 * This constructor is for the partition and union method. You can decide whether to copy the sequences in a new
	 * array or not.
	 * 
	 * <br>
	 * <br>
	 * 
	 * <b>You have to ensure that all sequences are defined over the alphabet(s) <code>abc</code>, since it is not
	 * check internally! Furthermore you have to ensure that the length is correct!</b>
	 * 
	 * @param abc
	 *            the alphabets
	 * @param seqs
	 *            the sequences
	 * @param length
	 *            the length of the sequences
	 * 
	 * @throws EmptySampleException
	 *             if the array <code>seqs</code> is <code>null</code> or its length is 0
	 */
	private Sample( AlphabetContainer abc, Sequence[] seqs, int length, String annotation ) throws EmptySampleException
	{
		if( seqs == null || seqs.length == 0 )
		{
			throw new EmptySampleException();
		}
		this.alphabetContainer = abc;
		this.seqs = seqs;
		this.length = length;
		this.annotation = annotation;
	}

	/**
	 * Creates a Sample from a StringExctractor using the given AlphabetContainer.
	 * 
	 * @param abc
	 *            the AlphabetContainer
	 * @param se
	 *            the StringExtractor
	 * 
	 * @throws WrongAlphabetException
	 *             if the AlphabetContainer is not suitable
	 * @throws EmptySampleException
	 *             if the Sample would be empty
	 * @throws WrongLengthException
	 *             never happens
	 */
	public Sample( AlphabetContainer abc, StringExtractor se ) throws WrongAlphabetException, EmptySampleException,
			WrongLengthException
	{
		this( abc, se, abc.getDelim(), 0 );
	}

	/**
	 * Creates a Sample from a StringExctractor using the given AlphabetContainer and all overlapping windows of
	 * <code>subsequenceLength</code>.
	 * 
	 * @param abc
	 *            the AlphabetContainer
	 * @param se
	 *            the StringExtractor
	 * @param subsequenceLength
	 *            the length of the window, sliding of the String of <code>se</code>. If <code>len</code> is 0
	 *            (zero) than the sequences are used as given from the StringExtractor.
	 * 
	 * @throws WrongAlphabetException
	 *             if the AlphabetContainer is not suitable
	 * @throws WrongLengthException
	 * 			   if the subsequence length is not supported
	 * @throws EmptySampleException
	 *             if the Sample would be empty
	 */
	public Sample( AlphabetContainer abc, StringExtractor se, int subsequenceLength ) throws WrongAlphabetException,
			WrongLengthException, EmptySampleException
	{
		this( abc, se, abc.getDelim(), subsequenceLength );
	}

	/**
	 * Creates a Sample from a StringExctractor using the given AlphabetContainer and delimiter.
	 * 
	 * @param abc
	 *            the AlphabetContainer
	 * @param se
	 *            the StringExtractor
	 * @param delim
	 *            the delimiter for parsing the Strings
	 * 
	 * @throws WrongAlphabetException
	 *             if the AlphabetContainer is not suitable
	 * @throws EmptySampleException
	 *             if the Sample would be empty
	 * @throws WrongLengthException
	 *             never happens
	 */
	public Sample( AlphabetContainer abc, StringExtractor se, String delim ) throws WrongAlphabetException,
			EmptySampleException, WrongLengthException
	{
		this( abc, se, delim, 0 );
	}

	/**
	 * Creates a Sample from a StringExctractor using the given AlphabetContainer, the given delimiter and all
	 * overlapping windows of <code>subsequenceLength</code>.
	 * 
	 * @param abc
	 *            the AlphabetContainer
	 * @param se
	 *            the StringExtractor
	 * @param delim
	 *            the delimiter for parsing the Strings
	 * @param subsequenceLength
	 *            the length of the window, sliding of the String of <code>se</code>. If <code>len</code> is 0
	 *            (zero) than the sequences are used as given from the StringExtractor.
	 * 
	 * @throws WrongAlphabetException
	 *             if the AlphabetContainer is not suitable
	 * @throws EmptySampleException
	 *             if the Sample would be empty
	 * @throws WrongLengthException
	 * 			   if the subsequence length is not supported
	 */
	public Sample( AlphabetContainer abc, StringExtractor se, String delim, int subsequenceLength )
			throws EmptySampleException, WrongAlphabetException, WrongLengthException
	{
		alphabetContainer = abc;
		int i, k = se.getNumberOfStrings();
		if( k == 0 )
		{
			throw new EmptySampleException();
		}
		seqs = new Sequence[k];
		SymbolExtractor temp = new SymbolExtractor( delim );
		temp.setStringToBeParsed( se.getString( 0 ) );
		length = temp.countElements();
		try
		{
			if( alphabetContainer.isDiscrete() )
			{
				// create pure discrete sample
				int l = (int) alphabetContainer.getMaximalAlphabetLength();
				if( l <= Byte.MAX_VALUE )
				{
					for( i = 0; i < seqs.length; i++ )
					{
						temp.setStringToBeParsed( se.getString( i ) );
						if( length != 0 && temp.countElements() != length )
						{
							length = 0;
						}
						seqs[i] = new ByteSequence( alphabetContainer, null, temp );
					}
				}
				else if( l <= Short.MAX_VALUE )
				{
					for( i = 0; i < seqs.length; i++ )
					{
						temp.setStringToBeParsed( se.getString( i ) );
						if( length != 0 && temp.countElements() != length )
						{
							length = 0;
						}
						seqs[i] = new ShortSequence( alphabetContainer, null, temp );
					}
				}
				else if( l <= Integer.MAX_VALUE )
				{
					for( i = 0; i < seqs.length; i++ )
					{
						temp.setStringToBeParsed( se.getString( i ) );
						if( length != 0 && temp.countElements() != length )
						{
							length = 0;
						}
						seqs[i] = new IntSequence( alphabetContainer, null, temp );
					}
				}
				else
				{
					throw new WrongAlphabetException( "Could not encode. Too many symbols." );
				}
			}
			else
			{
				// create hybrid or pure continuous sample
				if( delim.length() == 0 )
				{
					throw new IllegalArgumentException( "delim has to be not empty" );
				}
				for( i = 0; i < seqs.length; i++ )
				{
					temp.setStringToBeParsed( se.getString( i ) );
					if( length != 0 && temp.countElements() != length )
					{
						length = 0;
					}
					seqs[i] = new ArbitrarySequence( alphabetContainer, null, temp );
				}
			}
		}
		catch( WrongSequenceTypeException e )
		{
			RuntimeException doesNotHappen = new RuntimeException( e.getMessage() );
			doesNotHappen.setStackTrace( e.getStackTrace() );
			throw doesNotHappen;
		}
		setSubsequenceLength( subsequenceLength );
		if( subsequenceLength > 0 )
		{
			this.annotation = "all subsequences of length " + subsequenceLength + " from " + se.getAnnotation();
		}
		else
		{
			this.annotation = se.getAnnotation();
		}
	}

	/**
	 * This constructor enables you to use subsequences of the elements of a sample.
	 * 
	 * <br>
	 * <br>
	 * 
	 * It can also be used to ensure that all sequences that can be accessed by <code>getElementAt( int i )</code> are
	 * real objects and do not have to be created at the invocation of the method. (The same holds for the
	 * ElementEnumerator. In those cases both ways to access the sequence are approximately equally fast.)
	 * 
	 * @param s
	 *            the sample
	 * @param subsequenceLength
	 *            the new element length
	 * 
	 * @throws WrongLengthException
	 *             if something is wrong with subsequenceLength
	 */
	public Sample( Sample s, int subsequenceLength ) throws WrongLengthException
	{
		this(s,subsequenceLength,false);
	}
	
	/**
	 * This constructor enables you to use subsequences of the elements of a sample.
	 * 
	 * <br>
	 * <br>
	 * 
	 * It can also be used to ensure that all sequences that can be accessed by <code>getElementAt( int i )</code> are
	 * real objects and do not have to be created at the invocation of the method. (The same holds for the
	 * ElementEnumerator. In those cases both ways to access the sequence are approximately equally fast.)
	 * 
	 * If <code>copy</code> is <code>true</code> all subsequences are copied to form a new <code>Sample</code>.
	 *
	 * @param s
	 *            the sample
	 * @param subsequenceLength
	 *            the new element length
	 * @param copy true if the subsequences shall be copied
	 *
	 * @throws WrongLengthException
	 *             if something is wrong with subsequenceLength
	 */
	private Sample( Sample s, int subsequenceLength, boolean copy) throws WrongLengthException {
		if(copy){
			this.alphabetContainer = s.alphabetContainer;
			this.seqs = s.getAllElements();
			setSubsequenceLength( subsequenceLength );
			this.seqs = this.getAllElements();
			this.indexOfFirstSubseq = null;
			this.length = subsequenceLength;
			this.annotation = "all subsequences of length " + subsequenceLength + " from " + s.annotation;
		}else{
			this.alphabetContainer = s.alphabetContainer;
			if( s.indexOfFirstSubseq == null )
			{
				this.seqs = s.seqs;
			}
			else
			{
				this.seqs = s.getAllElements();
			}
			this.length = s.length;
			setSubsequenceLength( subsequenceLength );
			this.annotation = "all subsequences of length " + subsequenceLength + " from " + s.annotation;
		}
		
	}

	/**
	 * This constructor is specially designed for the method {@link de.jstacs.models.Model#emitSample(int, int...)}.
	 * 
	 * @param annotation 
	 * 			  the annotation of the sample
	 * @param seqs
	 *            the sequence(s)
	 * 
	 * @throws EmptySampleException
	 *             if the array <code>seqs</code> is <code>null</code> or the length is 0
	 * @throws IllegalArgumentException
	 *  		   if the alphabets do not match
	 */
	public Sample( String annotation, Sequence... seqs ) throws EmptySampleException, IllegalArgumentException
	{
		if( seqs == null || seqs.length == 0 )
		{
			throw new EmptySampleException();
		}
		this.alphabetContainer = seqs[0].getAlphabetContainer();
		this.seqs = new Sequence[seqs.length];
		int i = 1;
		length = seqs[0].getLength();
		this.seqs[0] = seqs[0];
		while( i < seqs.length )
		{
			this.seqs[i] = seqs[i];
			if( length != seqs[i].getLength() )
			{
				length = 0;
			}
			if( !alphabetContainer.checkConsistency( seqs[i++].getAlphabetContainer() ) )
			{
				throw new IllegalArgumentException( "The sequences are not defined over the same AlphabetContainer." );
			}
		}
		indexOfFirstSubseq = null;
		this.annotation = annotation;
	}

	/**
	 * Returns an array of sequences containing all elements of this Sample.
	 * 
	 * @return all elements (sequences) of this Sample
	 */
	public Sequence[] getAllElements()
	{
		Sequence[] res = new Sequence[getNumberOfElements()];
		ElementEnumerator ei = new ElementEnumerator( this );
		for( int i = 0; i < res.length; i++ )
		{
			res[i] = ei.nextElement();
		}
		return res;
	}

	/**
	 * Returns the AlphabetContainer of this Sample.
	 * 
	 * @return the AlphabetContainer of this Sample
	 */
	public final AlphabetContainer getAlphabetContainer()
	{
		return alphabetContainer;
	}

	/**
	 * This method returns some annotation of the sample.
	 * 
	 * @return some annotation of the sample
	 */
	public final String getAnnotation()
	{
		return annotation;
	}

	/**
	 * This method enables you to use only an composite sequences of all elements in the current sample. The composite
	 * sequences will be returned in an new sample.
	 * 
	 * @param starts
	 *            the start positions
	 * @param lengths
	 *            the lengths of the chunks
	 * 
	 * @return a composite sample
	 * 
	 * @throws IllegalArgumentException
	 *             if either start or length or both in combination are not suitable
	 */
	public final Sample getCompositeSample( int[] starts, int[] lengths ) throws IllegalArgumentException
	{
		AlphabetContainer abc = alphabetContainer.getCompositeContainer( starts, lengths );
		Sequence[] n = new Sequence[getNumberOfElements()];
		ElementEnumerator ei = new ElementEnumerator( this );
		int i = 0, length = 0;
		while( i < n.length )
		{
			n[i++] = ei.nextElement().getCompositeSequence( abc, starts, lengths );
		}
		for( i = 0; i < lengths.length; i++ )
		{
			length += lengths[i];
		}
		try
		{
			return new Sample( abc, n, length, "composite sample (starts=" + Arrays.toString( starts ) + ", lengths="
					+ Arrays.toString( lengths ) + ") of " + annotation );
		}
		catch( EmptySampleException doesNotHappen )
		{
			// since the current sample is not empty, a sample of infixes can't be empty
			return null;
		}
	}

	/**
	 * This method returns the element with index <code>i</code>. <a name="getElementAt"> See also <a
	 * href="#access">this comment</a>.
	 * 
	 * @param i
	 *            the index
	 * 
	 * @return the element </a>
	 */
	public Sequence getElementAt( int i )
	{
		if( indexOfFirstSubseq == null )
		{
			return seqs[i];
		}
		else
		{
			int seqInd = getIndexOfSeq( i ), startPos = i - ((seqInd == 0) ? 0 : indexOfFirstSubseq[seqInd - 1]);
			if( length == 0 )
			{
				return seqs[seqInd].getSubSequence( startPos );
			}
			else
			{
				return seqs[seqInd].getSubSequence( startPos, length );
			}
		}
	}

	/**
	 * Returns the length of the elements in this Sample.
	 * 
	 * @return the length of the elements in this Sample
	 */
	public int getElementLength()
	{
		return length;
	}

	/**
	 * This method enables you to use only an infix of all elements in the current sample. The subsequences will be
	 * returned in an new sample.
	 * 
	 * <br>
	 * <br>
	 * 
	 * This method can also be used to create a sample of prefixes if the element length is not zero.
	 * 
	 * @param start
	 *            the start position of the infix
	 * @param length
	 *            the length of the infix, has to be positive
	 * 
	 * @return a sample of specified infixe
	 * 
	 * @throws IllegalArgumentException
	 *             if either start or length or both in combination are not suitable
	 */
	public final Sample getInfixSample( int start, int length ) throws IllegalArgumentException
	{
		if( length <= 0 )
		{
			throw new IllegalArgumentException( "The length has to be positive." );
		}
		int i = this.length == 0 ? getMinimalElementLength() : this.length;
		if( i >= start + length )
		{
			if( start == 0 && length == this.length )
			{
				return this;
			}
			else
			{
				AlphabetContainer abc = alphabetContainer.getSubContainer( start, length );
				Sequence[] n = new Sequence[getNumberOfElements()];
				ElementEnumerator ei = new ElementEnumerator( this );
				for( i = 0; i < n.length; i++ )
				{
					n[i] = ei.nextElement().getSubSequence( abc, start, length );
				}
				try
				{
					return new Sample( abc, n, length, "infix sample (start=" + start + ", length=" + length + ") of "
							+ annotation );
				}
				catch( EmptySampleException doesNotHappen )
				{
					// since the current sample is not empty, a sample of infixes can't be empty
					return null;
				}
			}
		}
		else
		{
			throw new IllegalArgumentException( "The values for start and length or not suitable." );
		}
	}

	/**
	 * Returns the minimal length of an element in this Sample.
	 * 
	 * @return the minimal length of an element in this Sample
	 */
	public int getMinimalElementLength()
	{
		if( length != 0 )
		{
			return length;
		}
		else if( indexOfFirstSubseq != null )
		{
			return 0;
		}
		else
		{
			int l, min = Integer.MAX_VALUE;
			ElementEnumerator ei = new ElementEnumerator( this );
			while( ei.hasMoreElements() && min != 0 )
			{
				l = ei.nextElement().getLength();
				if( l < min )
				{
					min = l;
				}
			}
			return min;
		}
	}
	
	/**
	 * Returns the maximal length of an element in this Sample.
	 * 
	 * @return the maximal length of an element in this Sample
	 */
	public int getMaximalElementLength()
	{
		if( length != 0 )
		{
			return length;
		}
		else
		{
			int l, max = Integer.MIN_VALUE;
			ElementEnumerator ei = new ElementEnumerator( this );
			while( ei.hasMoreElements() )
			{
				l = ei.nextElement().getLength();
				if( l > max )
				{
					max = l;
				}
			}
			return max;
		}
	}

	/**
	 * Returns the number of elements in this Sample.
	 * 
	 * @return the number of elements in this Sample
	 */
	public int getNumberOfElements()
	{
		if( indexOfFirstSubseq == null )
		{
			return seqs.length;
		}
		else
		{
			return indexOfFirstSubseq[seqs.length - 1];
		}
	}

	/**
	 * Returns the number of overlapping elements that can be extracted.
	 * 
	 * @param len
	 *            the length of the elements
	 * 
	 * @return the number of elements with the specified length
	 * 
	 * @throws WrongLengthException
	 *             if the given length is bigger than the minimal element length
	 */
	public int getNumberOfElementsWithLength( int len ) throws WrongLengthException
	{
		if( length == 0 )
		{
			int l, i = 0, all = 0;
			while( i < seqs.length )
			{
				l = seqs[i++].getLength();
				if( l < len )
				{
					throw new WrongLengthException( len );
				}
				else
				{
					all += l - len + 1;
				}
			}
			return all;
		}
		else
		{
			if( length < len )
			{
				throw new WrongLengthException( len );
			}
			return (length - len + 1) * seqs.length;
		}
	}

	/**
	 * This method enables you to use only an suffix of all elements in the current sample. The subsequences will be
	 * returned in an new sample.
	 * 
	 * @param start
	 *            the start position of the suffix
	 * 
	 * @return a sample of specified suffixes
	 * 
	 * @throws IllegalArgumentException
	 *             if either start is not suitable
	 */
	public final Sample getSuffixSample( int start ) throws IllegalArgumentException
	{
		int l = 0;
		if( length != 0 )
		{
			l = length - start;
		}
		AlphabetContainer abc;
		if( alphabetContainer.isSimple() )
		{
			abc = alphabetContainer;
		}
		{
			abc = alphabetContainer.getSubContainer( start, l );
		}
		Sequence[] n = new Sequence[getNumberOfElements()];
		ElementEnumerator ei = new ElementEnumerator( this );
		for( int i = 0; i < n.length; i++ )
		{
			n[i] = ei.nextElement().getSubSequence( abc, start );
		}
		try
		{
			return new Sample( abc, n, l, "suffix sample (start=" + start + ") of " + annotation );
		}
		catch( EmptySampleException doesNotHappen )
		{
			// since the current sample is not empty, a sample of suffixes can't be empty
			return null;
		}
	}

	/**
	 * This method answers the question whether all random variable are defined over the same range, i.e. all positions
	 * use the same (fixed) alphabet.
	 * 
	 * @return <code>true</code> if the sample is simple
	 */
	public final boolean isSimpleSample()
	{
		return alphabetContainer.isSimple();
	}

	/**
	 * This method returns <code>true</code> all positions use discrete values.
	 * 
	 * @return <code>true</code> if the sample is discrete
	 */
	public final boolean isDiscreteSample()
	{
		return alphabetContainer.isDiscrete();
	}

	/**
	 * This method partitions the elements of the sample in <code>2</code> distinct parts. The second part (test
	 * sample) holds the percentage of <code>p</code>, the first the rest (train sample). The first part has element
	 * length as the current sample, the second has element length <code>subsequenceLength</code>.
	 * 
	 * @param p
	 *            the percentage for the second part, the second part holds at least this percentage of the full sample
	 * @param method
	 *            the method how to partition the sample (partitioning criterion)
	 * @param subsequenceLength
	 *            the element length of the second part. If <code>len</code> is 0 (zero) than the sequences are used
	 *            as given in this Sample.
	 *            
	 * @return the partitioned sample
	 * 
	 * @throws WrongLengthException
	 *             if some is wrong with <code>subsequenceLength</code>
	 * @throws UnsupportedOperationException
	 *             if the sample is not simple
	 * @throws EmptySampleException
	 *             if at least one of the created partitions is empty
	 * 
	 * @see Sample.PartitionMethod#PARTITION_BY_NUMBER_OF_ELEMENTS
	 * @see Sample.PartitionMethod#PARTITION_BY_NUMBER_OF_SYMBOLS
	 */
	public Sample[] partition( double p, PartitionMethod method, int subsequenceLength ) throws WrongLengthException,
			UnsupportedOperationException, EmptySampleException
	{
		if( !isSimpleSample() && length != subsequenceLength )
		{
			throw new UnsupportedOperationException( "The is method can only be used for simple samples." );
		}
		Sample[] parts = partition( method, 1d - p, p );
		parts[1].setSubsequenceLength( subsequenceLength );
		return parts;
	}

	/**
	 * This method partitions the elements of the sample in distinct parts.
	 * @param method
	 *            the method how to partition the sample (partitioning criterion)
	 * @param percentage
	 *            the array of percentage for each &quot;subsample&quot;
	 * 
	 * @return the array of &quot;subsamples&quot;
	 * 
	 * @throws IllegalArgumentException
	 *             if something with the percentages is not correct (sum != 1 or one value not in [0,1])
	 * @throws EmptySampleException
	 *             if at least one of the created partitions is empty
	 * 
	 * @see Sample.PartitionMethod#PARTITION_BY_NUMBER_OF_ELEMENTS
	 * @see Sample.PartitionMethod#PARTITION_BY_NUMBER_OF_SYMBOLS
	 */
	public Sample[] partition( PartitionMethod method, double... percentage ) throws IllegalArgumentException, EmptySampleException
	{
		if( percentage == null | percentage.length <= 1 )
		{
			return new Sample[]{ this };
		}
		int i = 0;
		double sum = 0;
		for( ; i < percentage.length; i++ )
		{
			if( 0 > percentage[i] || 1 < percentage[i] )
			{
				throw new IllegalArgumentException( "The value of percentage[" + i + "] is not in [0,1]." );
			}
			sum += percentage[i];
		}
		if( sum != 1d )
		{
			throw new IllegalArgumentException( "The sum of the percentages is not 1. (sum = " + sum + ")" );
		}
		long[] anz = new long[percentage.length];
		long l;
		switch( method )
		{
			case PARTITION_BY_NUMBER_OF_ELEMENTS:
				l = getNumberOfElements();
				break;
			case PARTITION_BY_NUMBER_OF_SYMBOLS:
				// count all nucleotides
				l = 0;
				ElementEnumerator ei = new ElementEnumerator( this );
				while( ei.hasMoreElements() )
				{
					l += ei.nextElement().getLength();
				}
				break;
			default:
				throw new IllegalArgumentException( "The partitioning criterion is unknown." );
		}

		long sumAnz = 0;
		for( i = 0; i < anz.length; i++ )
		{
			anz[i] = (long) Math.ceil( l * percentage[i] );
			sumAnz += anz[i];
		}
		i = anz.length-1;
		while( sumAnz < l )
		{
			anz[i]++;
			i--;
			if( i < 0 )
			{
				i = anz.length-1;
			}
		}
		return partition( anz, method );
	}

	/**
	 * This method partitions the elements of the sample in <code>k</code> distinct parts.
	 * 
	 * @param k
	 *            the number of parts
	 * @param method
	 * 			  how to split the data
	 * 
	 * @throws IllegalArgumentException
	 *             if <code>k</code> is not correct
	 * @throws EmptySampleException
	 *             if at least one of the created partitions is empty
	 * 
	 * @return the array of &quot;subsamples&quot;
	 * 
	 * @see Sample.PartitionMethod#PARTITION_BY_NUMBER_OF_ELEMENTS
	 * @see Sample.PartitionMethod#PARTITION_BY_NUMBER_OF_SYMBOLS
	 */
	public Sample[] partition( int k, PartitionMethod method ) throws IllegalArgumentException, EmptySampleException
	{
		if( k < 1 )
		{
			throw new IllegalArgumentException( "Can't partition in " + k + " parts." );
		}
		if( k == 1 )
		{
			return new Sample[]{ this };
		}
		long r, l;
		long[] anz = new long[k];
		switch( method )
		{
			case PARTITION_BY_NUMBER_OF_ELEMENTS:
				l = getNumberOfElements();
				r = l % k;
				l /= k;
				break;
			case PARTITION_BY_NUMBER_OF_SYMBOLS:
				// count all nucleotides
				long all = 0;
				ElementEnumerator ei = new ElementEnumerator( this );
				while( ei.hasMoreElements() )
				{
					all += ei.nextElement().getLength();
				}

				l = (long) (all / k);
				r = (long) (all % k);
				break;
			default:
				throw new IllegalArgumentException( "The partitioning criterion is unknown." );
		}
		int i = k - 1;
		Arrays.fill( anz, l );
		while( r > 0 )
		{
			anz[i--]++;
			r--;
		}

		return partition( anz, method );
	}

	private Sample[] partition( long[] anz, PartitionMethod method ) throws EmptySampleException
	{
		Sample[] parts;
		int[] pos = new int[getNumberOfElements()], ends = new int[anz.length];
		int last = pos.length, drawn, help, i = 0;
		for( i = 0; i < last; i++ )
		{
			pos[i] = i;
		}
		Random r = new Random();

		// for i = 0 it is not necessary to draw
		switch( method )
		{
			case PARTITION_BY_NUMBER_OF_ELEMENTS:
				int j;
				for( i = anz.length - 1; i > 0; i-- )
				{
					ends[i] = last;
					for( j = 0; j < anz[i]; j++ )
					{
						drawn = r.nextInt( last );
						help = pos[drawn];
						pos[drawn] = pos[--last];
						pos[last] = help;
					}
				}
				break;
			case PARTITION_BY_NUMBER_OF_SYMBOLS:
				long l;
				for( i = anz.length - 1; i > 0; i-- )
				{
					ends[i] = last;
					l = 0;
					while( l < anz[i] )
					{
						drawn = r.nextInt( last );
						help = pos[drawn];
						pos[drawn] = pos[--last];
						pos[last] = help;
						l += getElementAt( help ).getLength();
					}
				}
				break;
			default:
				throw new IllegalArgumentException( "The partitioning criterion is unknown." );
		}
		ends[i] = last;
		Sequence[][] seqs = getPartitionsOfElements( pos, ends );
		parts = new Sample[seqs.length];
		for( i = 0; i < anz.length; i++ )
		{
			parts[i] = new Sample( alphabetContainer, seqs[i], length, "partition of " + annotation );
		}
		return parts;
	}

	/**
	 * Randomly samples elements (sequences) from the set of all elements (sequences) contained in this
	 * <code>Sample</code>. <br>
	 * Depending on whether this Sample is chosen to contain overlapping elements (windows of length subsequenceLength)
	 * or not, those elements (overlapping windows, whole sequences) are subsampled.
	 * 
	 * @param number
	 *            of Sequences that should be drawn from the contained set of sequences (with replacement)
	 *            
	 * @return a new Sample containing the drawn Sequences
	 * 
	 * @throws EmptySampleException if number is not positive
	 */
	public Sample subSampling( int number ) throws EmptySampleException
	{
		if( number <= 0 )
		{
			throw new EmptySampleException();
		}
		Random r = new Random();
		Sequence subsampled_seqs[] = new Sequence[number];

		int numOfElements = this.getNumberOfElements();

		for( int i = 0; i < subsampled_seqs.length; i++ )
		{
			subsampled_seqs[i] = this.getElementAt( r.nextInt( numOfElements ) );
		}

		return new Sample( this.alphabetContainer, subsampled_seqs, this.length, "subsample of " + annotation );
	}

	/**
	 * This method writes a message <code>msg</code> and the sample to a file <code>f<code>
	 * 
	 * @param msg the message, any information
	 * @param f the File
	 * 
	 * @throws IOException if something went wrong with the file
	 */
	public final void save( String msg, File f ) throws IOException
	{
		BufferedWriter b = new BufferedWriter( new FileWriter( f ) );
		ElementEnumerator ei = new ElementEnumerator( this );
		if( msg != null && !msg.trim().equals( "" ) )
		{
			b.write( "# " + msg );
			b.newLine();
		}
		if( annotation != null && !annotation.trim().equals( "" ) )
		{
			b.write( "# annotation: " + annotation );
			b.newLine();
		}
		b.write( ei.nextElement().toString() );
		while( ei.hasMoreElements() )
		{
			b.newLine();
			b.write( ei.nextElement().toString() );
		}
		b.close();
	}

	public String toString()
	{
		ElementEnumerator ei = new ElementEnumerator( this );
		int l = getNumberOfElements();
		StringBuffer erg = new StringBuffer( l * Math.max( getElementLength(), 10 ) );
		erg.append( "annotation       : " + annotation + "\n\n" );
		erg.append( "AlphabetContainer:\n" + alphabetContainer + "\n" );
		erg.append( "element length   : " + getElementLength() + "\n" );
		erg.append( "of elements    : " + l + "\n\nsequences:\n" );
		
		Pattern cp = Pattern.compile( "\n" );
		Matcher m = cp.matcher( erg );
		String temp = m.replaceAll( "\n# " );
		erg.delete( 0, erg.length() );
		erg.append( "# " );
		erg.append( temp );
		erg.append( "\n" );
		
		while( ei.hasMoreElements() )
		{
			erg.append( ei.nextElement() + "\n" );
		}
		return erg.toString();
	}

	// finds the index in O(log seqs.length)
	private int getIndexOfSeq( int overAllIndex ) throws IndexOutOfBoundsException
	{
		if( overAllIndex < 0 || overAllIndex > indexOfFirstSubseq[seqs.length - 1] )
		{
			throw new IndexOutOfBoundsException();
		}
		int lower = 0, sep, upper = seqs.length - 1;
		if( overAllIndex < indexOfFirstSubseq[lower] )
		{
			return 0;
		}
		else
		{
			do
			{
				sep = (upper + lower) / 2;
				if( overAllIndex < indexOfFirstSubseq[sep] )
				{
					upper = sep;
				}
				else
				{
					lower = sep;
				}
			}
			while( upper - lower > 1 );
			return lower + 1;
		}
	}

	private Sequence[][] getPartitionsOfElements( int[] pos, int[] ends )
	{
		int i = 0, j = 0;
		int[] part = new int[pos.length];
		for( ; i < pos.length; i++ )
		{
			if( i == ends[j] )
			{
				j++;
			}
			part[pos[i]] = j;
		}

		int[] index = new int[ends.length];
		ElementEnumerator ei = new ElementEnumerator( this );
		Sequence[][] seqs = new Sequence[ends.length][];
		j = 0;
		for( i = 0; i < seqs.length; i++ )
		{
			seqs[i] = new Sequence[ends[i] - j];
			j = ends[i];
		}
		for( i = 0; ei.hasMoreElements(); i++ )
		{
			seqs[part[i]][index[part[i]]++] = ei.nextElement();
		}

		return seqs;
	}

	/**
	 * This method computes the indices if one wants to use subsequences of length <code>len</code>. If
	 * <code>len</code> is 0 (zero) than the sequences are used as given.
	 * 
	 * @param len
	 *            the subsequence length
	 * 
	 * @throws WrongLengthException
	 *             if the subsequence length is not supported
	 */
	private void setSubsequenceLength( int len ) throws WrongLengthException
	{
		if( len < 0 )
		{
			throw new WrongLengthException( len );
		}
		if( length != len )
		{
			if( len == 0 )
			{
				return;
			}
			if( indexOfFirstSubseq != null )
			{
				throw new UnsupportedOperationException( "operation not supported since indexOfFirstSubseq != null" );
			}
			if( isSimpleSample() )
			{
				indexOfFirstSubseq = new int[seqs.length];
				if( length == 0 )
				{
					int l, i = 0, all = 0;
					while( i < seqs.length )
					{
						l = seqs[i].getLength();
						if( l < len )
						{
							throw new WrongLengthException( len );
						}
						else
						{
							all += l - len + 1;
							indexOfFirstSubseq[i++] = all;
						}
					}
				}
				else
				{
					if( length < len )
					{
						throw new WrongLengthException( len );
					}
					for( int i = 0, offset = length - len + 1, all = offset; i < seqs.length; i++, all += offset )
					{
						indexOfFirstSubseq[i] = all;
					}
				}
				length = len;
			}
			else
			{
				throw new UnsupportedOperationException(
						"For this sample it is impossible to have a sliding window, since the AlphabetContainer is not simple." );
			}
		}
	}

	/**
	 * This class can be used to have a fast sequential access to a sample. <a name="ElementEnumerator"> It enumerates 
	 * all elements of a sample.
	 * 
	 * <br>
	 * <br>
	 * 
	 * As further functionality the method reset() is implemented to reuse an ElementEnumerator.
	 * 
	 * @author Jens Keilwagen </a>
	 */
	public static class ElementEnumerator implements Enumeration<Sequence>
	{
		private int seqCounter, startPosCounter;

		private Sample s;

		/**
		 * This constructor creates an new ElementEnumerator on the given <code>data</code>
		 * 
		 * @param data the sample
		 */
		public ElementEnumerator( Sample data )
		{
			s = data;
			reset();
		}

		public boolean hasMoreElements()
		{
			return seqCounter < s.seqs.length;
		}

		public Sequence nextElement()
		{
			if( s.indexOfFirstSubseq == null )
			{
				return s.seqs[seqCounter++];
			}
			else
			{
				Sequence current;
				if( s.length != 0 )
				{
					current = s.seqs[seqCounter].getSubSequence( startPosCounter, s.length );
				}
				else
				{
					current = s.seqs[seqCounter].getSubSequence( startPosCounter );
				}
				if( ++startPosCounter + s.length > s.seqs[seqCounter].getLength() )
				{
					seqCounter++;
					startPosCounter = 0;
				}
				return current;
			}
		}

		/**
		 * Restarts the enumeration.
		 */
		public void reset()
		{
			seqCounter = startPosCounter = 0;
		}
	}
	
	/**
	 * This class enables you to eliminate sequences that occur more than once in one or more samples. The number of
	 * occurrence is given by the weight for a sequence.
	 * 
	 * @author Jens Keilwagen
	 */
	public static class WeightedSampleFactory
	{
		/**
		 * This enum defines the different types of sort operation that can be performed while creating a {@link WeightedSampleFactory}.
		 *  
		 * @author Jens Keilwagen
		 */
		public static enum SortOperation {
			/**
			 * This value indicates that no sorting shall be done after eliminating sequences that occur more than once.
			 */
			NO_SORT,
		
			/**
			 * This value indicates that the sequences shall be sorted after eliminating sequences that occur more than once.
			 * Probably this is the slowest option.
			 */
			SORT_BY_SEQUENCE,
			
			/**
			 * This value indicates that the sequences shall be sorted according to their weights after eliminating sequences
			 * that occur more than once.
			 */
			SORT_BY_WEIGHTS;
		}
		
		private Sample res;

		private double[] weights;

		/**
		 * This constructor creates a {@link WeightedSampleFactory} on the given {@link Sample}(s).
		 *  
		 * @param sort the SortOperation
		 * @param data the Sample(s)
		 * 
		 * @throws WrongAlphabetException if the alphabets of the samples do not match
		 * @throws WrongLengthException does not happen
		 */
		public WeightedSampleFactory( SortOperation sort, Sample... data ) throws WrongAlphabetException, WrongLengthException
		{
			this( sort, data, null, 0 );
		}
		
		/**
		 * This constructor creates a {@link WeightedSampleFactory} on the given {@link Sample} and <code>weights</code>.
		 *  
		 * @param sort the SortOperation
		 * @param data the Sample
		 * @param weights the weights for each element in the Sample
		 * 
		 * @throws WrongAlphabetException if the alphabets of the samples do not match
		 * @throws WrongLengthException does not happen
		 */
		public WeightedSampleFactory( SortOperation sort, Sample data, double[] weights ) throws WrongAlphabetException, WrongLengthException
		{
			this( sort, new Sample[]{ data }, new double[][]{ weights }, 0 );
		}

		/**
		 * This constructor creates a {@link WeightedSampleFactory} on the given {@link Sample} and <code>weights</code>.
		 *  
		 * @param sort the SortOperation
		 * @param data the Sample
		 * @param weights the weight for each element in the Sample
		 * @param length the length of the elements in the resulting WeightedSampleFactory
		 * 
		 * @throws WrongAlphabetException if the alphabets of the samples do not match
		 * @throws WrongLengthException if the length is not supported
		 */
		public WeightedSampleFactory( SortOperation sort, Sample data, double[] weights, int length ) throws WrongAlphabetException, WrongLengthException
		{
			this( sort, new Sample[]{data}, new double[][]{weights}, length );
		}

		/**
		 * This constructor creates a {@link WeightedSampleFactory} on the given array of {@link Sample}s and <code>weights</code>.
		 *  
		 * @param sort the SortOperation
		 * @param data the Samples
		 * @param weights the weights for each element in each Sample
		 * @param length the length of the elements in the resulting WeightedSampleFactory
		 * 
		 * @throws WrongAlphabetException if the alphabets of the samples do not match
		 * @throws WrongLengthException if the length is not supported
		 */
		public WeightedSampleFactory( SortOperation sort, Sample[] data, double[][] weights, int length ) throws WrongAlphabetException, WrongLengthException
		{
			Hashtable<Sequence,double[]> ht = new Hashtable<Sequence,double[]>( data.length * data[0].getNumberOfElements() ); 
			for( int i = 0; i < data.length; i++ )
			{
				if( data[0].alphabetContainer.checkConsistency(data[i].alphabetContainer) )
				{
					if( weights != null )
					{
						add( ht, data[i], weights[i], length );
					}
					else
					{
						add( ht, data[i], null, length );
					}
				}
				else
				{
					throw new WrongAlphabetException( "The AlphabetContainer for all Sample has to be consistent." );
				}
			}
			create( "all sequences" + ( length>0 ? (" of length " + length) : "" ) + " that occur in "+ Sample.getAnnotation(data), sort, ht );
		}
		
		private void add( Hashtable<Sequence,double[]> ht, Sample data, double[] weights, int length ) throws WrongLengthException
		{
			Sequence s;
			double w = 1;
			int i = 0, anz = data.getNumberOfElements(), j, l;
			for( ; i < anz; i++ )
			{
				//System.out.println( i + "\t" + ht.size() );
				s = data.getElementAt(i);
				if( weights != null )
				{
					w = weights[i];
				}
				if( length == 0 )
				{
					put( ht, s, w );
				}
				else
				{
					l = s.getLength() - length + 1;
					if( l > 0 )
					{
						for( j = 0; j < l; j++ )
						{
							put( ht, s.getSubSequence(s.alphabetCon,j,length), w );
						}
					}
					else
					{
						throw new WrongLengthException( length );
					}
				}
			}
		}
		
		private void put( Hashtable<Sequence,double[]> ht, Sequence s, double w )
		{
			double[] value = ht.get(s);
			if( value != null )
			{
				value[0] += w;
			}
			else
			{
				ht.put(s, new double[]{w} );
			}
		}
		
		@SuppressWarnings("unchecked")
		private void create( String annotation, SortOperation sort, Hashtable<Sequence,double[]> ht )
		{		
			Entry<Sequence,double[]>[] array = ht.entrySet().toArray( new Entry[0] );
			switch( sort )
			{
				case NO_SORT:
					break;
				case SORT_BY_SEQUENCE:
					Arrays.sort( array, SequenceComparator.DEFAULT );
					break;
				case SORT_BY_WEIGHTS:
					Arrays.sort( array, WeightsComparator.DEFAULT );
					break;
				default: throw new IllegalArgumentException( "unknown sort operation" );
			}
			
			Sequence[] seqs = new Sequence[array.length];
			weights = new double[array.length];
			Entry<Sequence,double[]> e;
			for( int i = 0; i < weights.length; i++ )
			{
				e = array[i];
				seqs[i] = e.getKey();
				weights[i] = e.getValue()[0];
			}
			try
			{
				res = new Sample( annotation, seqs );
			}
			catch( Exception doesNotHappen )
			{
				RuntimeException r = new RuntimeException( doesNotHappen.getMessage() );
				r.setStackTrace( doesNotHappen.getStackTrace() );
				throw r;
			}
		}
		
		/**
		 * Returns the sequence with index <code>index</code>.
		 * 
		 * @param index the index of the sequence
		 * 
		 * @return the sequence with index <code>index</code>
		 */
		public Sequence getElementAt( int index )
		{
			return res.getElementAt( index );
		}

		/**
		 * Returns the number of elements in the internal Sample.
		 * 
		 * @return the number of elements in the internal Sample
		 */
		public int getNumberOfElements()
		{
			return res.getNumberOfElements();
		}

		/**
		 * Returns the sample, where each sequence occurs only once
		 * 
		 * @return the sample, where each sequence occurs only once
		 */
		public Sample getSample()
		{
			return res;
		}

		/**
		 * Returns the sum of all weights
		 * 
		 * @return the sum of all weights
		 */
		public double getSumOfWeights()
		{
			double res = 0;
			for( int i = 0; i < weights.length; i++ )
			{
				res += weights[i];
			}
			return res;
		}

		/**
		 * Returns the weight for the sequence with index <code>index</code>.
		 * 
		 * @param index the index of the sequence
		 * 
		 * @return the weight for the sequence with index <code>index</code>
		 */
		public double getWeight( int index )
		{
			return weights[index];
		}

		/**
		 * Returns a copy of the weights for the sample.
		 * 
		 * @see Sample.WeightedSampleFactory#getSample()
		 * 
		 * @return the weights for the sample
		 */
		public double[] getWeights()
		{
			return weights.clone();
		}

		public String toString()
		{
			StringBuffer sb = new StringBuffer( (10 + res.getElementLength() ) * weights.length ); 
			for( int i = 0; i < weights.length; i++)
			{
				sb.append( i + " " + res.getElementAt(i) + "\t" + weights[i] + "\n" );
			}
			return sb.toString();
		}
		
		private static final class WeightsComparator implements Comparator<Entry<Sequence,double[]>>
		{
			/**
			 * This constant hold the only instance of the class.
			 */
			public static final WeightsComparator DEFAULT = new WeightsComparator();
			
			private WeightsComparator(){};
			
			public int compare( Entry<Sequence, double[]> o1, Entry<Sequence, double[]> o2 )
			{
				return (int) Math.signum( o2.getValue()[0] - o1.getValue()[0] );
			}
		}
		
		private static final class SequenceComparator implements Comparator<Entry<Sequence,double[]>>
		{
			/**
			 * This constant hold the only instance of the class.
			 */
			public static final SequenceComparator DEFAULT = new SequenceComparator();
			
			private SequenceComparator(){};
			
			public int compare( Entry<Sequence, double[]> o1, Entry<Sequence, double[]> o2 )
			{
				return o1.getKey().compareTo( o2.getKey() );
			}
		}
	}
}
