/*
 * This file is part of Jstacs.
 * 
 * Jstacs is free software: you can redistribute it and/or modify it under the
 * terms of the GNU General Public License as published by the Free Software
 * Foundation, either version 3 of the License, or (at your option) any later
 * version.
 * 
 * Jstacs is distributed in the hope that it will be useful, but WITHOUT ANY
 * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
 * A PARTICULAR PURPOSE. See the GNU General Public License for more details.
 * 
 * You should have received a copy of the GNU General Public License along with
 * Jstacs. If not, see <http://www.gnu.org/licenses/>.
 * 
 * For more information on Jstacs, visit http://www.jstacs.de
 */

package de.jstacs.data.alphabets;

import java.util.Hashtable;
import java.util.StringTokenizer;

import de.jstacs.DataType;
import de.jstacs.NonParsableException;
import de.jstacs.WrongAlphabetException;
import de.jstacs.data.Alphabet;
import de.jstacs.io.XMLParser;
import de.jstacs.parameters.CollectionParameter;
import de.jstacs.parameters.SimpleParameter;

/**
 * Class for an alphabet that consists of arbitrary {@link String}s. For DNA
 * alphabets, the class {@link DNAAlphabet} should be used.
 * 
 * @author Jan Grau, Jens Keilwagen
 */
public class DiscreteAlphabet extends Alphabet {

	private static final String XML_TAG = "DiscreteAlphabet";

	/**
	 * The alphabet as {@link String} array.
	 */
	private String[] alphabet;

	/**
	 * For encoding.
	 */
	private Hashtable<String, Integer> hash;

	private boolean caseInsensitive;

	private int longestCharacter;

	/**
	 * The {@link de.jstacs.parameters.ParameterSet} describing this
	 * {@link DiscreteAlphabet}.
	 */
	protected AlphabetParameterSet parameters;

	/**
	 * Extracts the alphabet from the {@link StringBuffer}.
	 * 
	 * @param representation
	 *            the XML stream
	 * @throws NonParsableException
	 *             if the stream is not parsable
	 */
	public DiscreteAlphabet( StringBuffer representation ) throws NonParsableException {
		StringBuffer xml = XMLParser.extractForTag( representation, XML_TAG );
		try {
			init( XMLParser.extractStringArrayForTag( xml, "symbols" ), XMLParser.extractBooleanForTag( xml, "caseInsensitive" ) );
		} catch ( Exception e ) {
			NonParsableException n = new NonParsableException( e.getMessage() );
			n.setStackTrace( e.getStackTrace() );
			throw n;
		}
	}

	/**
	 * The constructor for the {@link de.jstacs.InstantiableFromParameterSet}
	 * interface.
	 * 
	 * @param parameters
	 *            the {@link de.jstacs.parameters.ParameterSet}
	 * 
	 * @throws IllegalArgumentException
	 *             is thrown if space or tab will be used as symbols
	 * @throws DoubleSymbolException
	 *             is thrown if one of the symbols occurred more than once
	 */
	public DiscreteAlphabet( DiscreteAlphabetParameterSet parameters ) throws IllegalArgumentException, DoubleSymbolException {
		try {
			this.parameters = parameters.clone();
			String alphValue = (String)parameters.getParameterAt( 0 ).getValue();
			boolean caseInsensitive = (Boolean)parameters.getParameterAt( 1 ).getValue();

			StringTokenizer tok = new StringTokenizer( alphValue, " " );
			String[] vals = new String[tok.countTokens()];
			int i = 0;
			while( tok.hasMoreTokens() ) {
				vals[i++] = tok.nextToken();
			}
			init( vals, caseInsensitive );
		} catch ( CloneNotSupportedException e ) {
			throw new IllegalArgumentException( e.getCause().getMessage() );
		}

	}

	/* (non-Javadoc)
	 * @see de.jstacs.InstantiableFromParameterSet#getCurrentParameterSet()
	 */
	public AlphabetParameterSet getCurrentParameterSet() throws Exception {
		if( parameters != null ) {
			return parameters.clone();
		} else {
			return new DiscreteAlphabetParameterSet( alphabet.clone(), caseInsensitive );
		}
	}

	private void init( String[] alphabet, boolean caseInsensitive ) throws IllegalArgumentException, DoubleSymbolException {
		hash = new Hashtable<String, Integer>( alphabet.length, 1.0f );
		this.alphabet = new String[alphabet.length];
		this.caseInsensitive = caseInsensitive;
		longestCharacter = 0;
		for( int i = 0; i < alphabet.length; i++ ) {
			if( alphabet[i].length() == 0 ) {
				throw new IllegalArgumentException( "\"\" can not be a symbol/character." );
			}
			if( alphabet[i].indexOf( " " ) >= 0 || alphabet[i].indexOf( "\t" ) >= 0 ) {
				throw new IllegalArgumentException( "blanks and tabs can not be part of a symbol/character." );
			}
			if( hash.containsKey( caseInsensitive ? alphabet[i].toUpperCase() : alphabet[i] ) ) {
				throw new DoubleSymbolException( caseInsensitive ? alphabet[i].toUpperCase() : alphabet[i] );
			}
			hash.put( caseInsensitive ? alphabet[i].toUpperCase() : alphabet[i], i );
			this.alphabet[i] = alphabet[i];
			if( longestCharacter < alphabet[i].length() ) {
				longestCharacter = alphabet[i].length();
			}
		}
	}

	/**
	 * Constructor for a discrete alphabet in <code>[min,max]</code>.
	 * 
	 * @param min
	 *            the minimal value (inclusive)
	 * @param max
	 *            the maximal value (inclusive)
	 * @throws IllegalArgumentException
	 *             is thrown if <code>min</code>&gt;<code>max</code>
	 */
	public DiscreteAlphabet( int min, int max ) throws IllegalArgumentException {
		if( min > max ) {
			throw new IllegalArgumentException( "The maximal value has to be equal or greater than the minimal value." );
		}
		String[] alphabet = new String[max - min + 1];
		for( int i = min; i <= max; i++ ) {
			alphabet[i - min] = "" + i;
		}

		try {
			init( alphabet, false );
		} catch ( Exception doesNotHappen ) {
			IllegalArgumentException i = new IllegalArgumentException( doesNotHappen.getCause() );
			i.setStackTrace( doesNotHappen.getStackTrace() );
			throw i;
		}
	}

	/**
	 * Constructor for a discrete alphabet. The {@link String} array is cloned
	 * internally.
	 * 
	 * @param alphabet
	 *            the alphabet as {@link String} array
	 * @param caseInsensitive
	 *            if <code>true</code> then there will be no difference between
	 *            lowercase and uppercase letters/symbols in the alphabet (no
	 *            case sensivity)
	 * 
	 * @throws DoubleSymbolException
	 *             is thrown if one of the symbols occurred more than once
	 * @throws IllegalArgumentException
	 *             is thrown if one of the symbols is either empty or a
	 *             white-space character
	 */
	public DiscreteAlphabet( String[] alphabet, boolean caseInsensitive ) throws DoubleSymbolException, IllegalArgumentException {
		init( alphabet, caseInsensitive );
	}

	/* (non-Javadoc)
	 * @see de.jstacs.Storable#toXML()
	 */
	public StringBuffer toXML() {
		StringBuffer xml = new StringBuffer( 200 );
		XMLParser.appendStringArrayWithTags( xml, alphabet, "symbols" );
		XMLParser.appendBooleanWithTags( xml, caseInsensitive, "caseInsensitive" );
		XMLParser.addTags( xml, XML_TAG );
		return xml;
	}

	/* (non-Javadoc)
	 * @see java.lang.Comparable#compareTo(java.lang.Object)
	 */
	public int compareTo( Alphabet b ) {
		if( !getClass().equals( b.getClass() ) ) {
			return getClass().getName().compareTo( b.getClass().getName() );
		} else {
			if( b == this ) {
				return 0;
			}
			DiscreteAlphabet a = (DiscreteAlphabet)b;
			if( a.alphabet.length != alphabet.length ) {
				return a.alphabet.length - alphabet.length;
			}
			if( a.caseInsensitive != caseInsensitive ) {
				return a.caseInsensitive ? -1 : 1;
			}
			int i = 0;
			if( caseInsensitive ) {
				for( ; i < alphabet.length; i++ ) {
					if( !alphabet[i].equalsIgnoreCase( a.getSymbolAt( i ) ) ) {
						break;
					}
				}
			} else {
				for( ; i < alphabet.length; i++ ) {
					if( !alphabet[i].equals( a.getSymbolAt( i ) ) ) {
						break;
					}
				}
			}
			if( i < alphabet.length ) {
				return alphabet[i].compareTo( a.getSymbolAt( i ) );
			} else {
				return 0;
			}
		}
	}

	/**
	 * Returns the code of a given symbol.
	 * 
	 * @param symbol
	 *            the symbol
	 * @return the code
	 * 
	 * @throws WrongAlphabetException
	 *             if the symbol is not defined in the alphabet
	 */
	public final int getCode( String symbol ) throws WrongAlphabetException {
		if( caseInsensitive ) {
			symbol = symbol.toUpperCase();
		}
		Integer i = hash.get( symbol );
		if( i == null ) {
			throw new WrongAlphabetException( "Symbol \"" + symbol + "\" from input not defined in alphabet." );
		}
		return i;
	}

	/**
	 * Returns the length of the longest &quot;symbol&quot; in the alphabet.
	 * 
	 * @return the length of the longest &quot;symbol&quot;
	 */
	public final int getMaximalSymbolLength() {
		return longestCharacter;
	}

	/* (non-Javadoc)
	 * @see de.jstacs.data.Alphabet#getMin()
	 */
	@Override
	public double getMin() {
		return 0;
	}

	/**
	 * Returns the symbol at position <code>i</code> in the alphabet.
	 * 
	 * @param i
	 *            the position in the alphabet
	 * 
	 * @return the symbol
	 */
	public final String getSymbolAt( int i ) {
		return alphabet[i];
	}

	/**
	 * If this method returns <code>true</code> the alphabet ignores the case.
	 * 
	 * @return <code>true</code> if the alphabet ignores the case
	 */
	public final boolean ignoresCase() {
		return caseInsensitive;
	}

	/**
	 * Returns <code>true</code> if <code>candidate</code> is an element of the
	 * internal interval.
	 * 
	 * @param candidate
	 *            the value to be tested
	 * 
	 * @return <code>true</code> if <code>candidate</code> is an element of the
	 *         internal interval
	 */
	public final boolean isEncodedSymbol( int candidate ) {
		return ( 0 <= candidate ) && ( candidate < alphabet.length );
	}

	/**
	 * Tests if a given symbol is contained in the alphabet.
	 * 
	 * @param candidat
	 *            the candidat symbol
	 * 
	 * @return <code>true</code> if the <code>candidat</code> is a symbol of the
	 *         alphabet
	 */
	public final boolean isSymbol( String candidat ) {
		return hash.contains( caseInsensitive ? candidat.toUpperCase() : candidat );
	}

	/**
	 * Returns the number of symbols in the calling alphabet.
	 * 
	 * @return the number of symbols
	 */
	@Override
	public final double length() {
		return alphabet.length;
	}

	/* (non-Javadoc)
	 * @see de.jstacs.data.Alphabet#toString()
	 */
	@Override
	public String toString() {
		String erg = "{\"" + alphabet[0];
		for( int i = 1; i < alphabet.length; i++ ) {
			erg += "\", \"" + alphabet[i];
		}
		return erg + "\"}";
	}

	/**
	 * Class for the {@link de.jstacs.parameters.ParameterSet} of a
	 * {@link DiscreteAlphabet}.
	 * 
	 * @author Jan Grau
	 * 
	 */
	public static class DiscreteAlphabetParameterSet extends AlphabetParameterSet {

		/**
		 * Creates a new {@link DiscreteAlphabetParameterSet} with empty values.
		 * 
		 */
		public DiscreteAlphabetParameterSet() {
			super( DiscreteAlphabet.class );
		}

		/**
		 * Creates a new {@link DiscreteAlphabetParameterSet} from an array of
		 * symbols.
		 * 
		 * @param alphabet
		 *            the array of symbols
		 * @param caseInsensitive
		 *            <code>true</code> if the {@link DiscreteAlphabet} shall be
		 *            case insensitive
		 * 
		 * @throws Exception
		 *             is thrown if the parameters could not be created
		 */
		public DiscreteAlphabetParameterSet( String[] alphabet, boolean caseInsensitive ) throws Exception {
			this();
			loadParameters();
			String alphString = "" + alphabet[0];
			for( int i = 1; i < alphabet.length; i++ ) {
				alphString += " " + alphabet[i];
			}
			parameters.get( 0 ).setValue( alphString );
			parameters.get( 1 ).setValue( new String( caseInsensitive ? "Case insensitive" : "Case sensitive" ) );
		}

		/**
		 * Creates a new {@link DiscreteAlphabetParameterSet} from an array of
		 * symbols.
		 * 
		 * @param alphabet
		 *            the array of symbols
		 * @param caseInsensitive
		 *            <code>true</code> is the {@link DiscreteAlphabet} shall be
		 *            case insensitive
		 * @throws Exception
		 *             is thrown if the parameters could not be created
		 */
		public DiscreteAlphabetParameterSet( char[] alphabet, boolean caseInsensitive ) throws Exception {
			this();
			loadParameters();
			String alphString = "" + alphabet[0];
			for( int i = 1; i < alphabet.length; i++ ) {
				alphString += " " + alphabet[i];
			}
			parameters.get( 0 ).setValue( alphString );
			parameters.get( 1 ).setValue( new String( caseInsensitive ? "Case insensitive" : "Case sensitive" ) );
		}

		/**
		 * Creates a new {@link DiscreteAlphabetParameterSet} from its XML
		 * representation.
		 * 
		 * @param representation
		 *            the XML representation
		 * @throws NonParsableException
		 *             is thrown if <code>representation</code> could not be
		 *             parsed
		 */
		public DiscreteAlphabetParameterSet( StringBuffer representation ) throws NonParsableException {
			super( representation );
		}

		/* (non-Javadoc)
		 * @see de.jstacs.parameters.ParameterSet#loadParameters()
		 */
		@Override
		protected void loadParameters() throws Exception {
			initParameterList();
			parameters.add( new SimpleParameter( DataType.STRING,
					"Values of the alphabet",
					"The possible values of the discrete alphabet." + "If the alphabet consists of single characters, e.g. A, C, G, and T,"
							+ " the values may be set as a single string, e.g. &quot;ACGT&quot;."
							+ "If the alphabet consists of multi-character symbols, e.g. Gly, Asp, Ser,"
							+ "the symbols must be separated by spaces.",
					true ) );
			parameters.add( new CollectionParameter( DataType.BOOLEAN,
					new String[]{ "Case insensitive", "Case sensitive" },
					new Boolean[]{ true, false },
					"Case insensitive",
					"Use the alphabet case insensitive",
					true ) );
		}

		/* (non-Javadoc)
		 * @see de.jstacs.parameters.InstanceParameterSet#getInstanceComment()
		 */
		@Override
		public String getInstanceComment() {
			return "An alphabet that consists of discrete values.";
		}
	}
}