import java.util.*;
import java.io.*;

/**
 * A program to perform T9-related analyses.
 * <p>
 * 
 * T9 is a dictionary-based disambiguating method for text entry using a telephone keypad. It is a
 * product of Tegic Communications, Inc. (http://www.tegic.com), a subsiduary of America Online
 * (AOL).
 * <p>
 * 
 * Invocation:
 * <p>
 * 
 * <pre>
*     PROMPT>java T9 file [-s] [-a] [-k]
*
*     where file = a word+freq+keystrokes dictionary file
*           -s = output summary data only
*           -a = output ambiguous word sets
*           -k = output word+freq+T9keystrokes data
*           -c = output collision analysis (word, freq, set_size, index_in_set)
*           (Note: default is no output)
* </pre>
 * 
 * Example invocations...
 * <p>
 * 
 * <pre>
*     PROMPT>java T9 d1-wordfreq-phoneks.txt -s
*     Number of words: 9022
*     Ambiguous words: 1064 (11.8%)
*     Ambiguous words requiring...
*        0 presses of NEXT: 476
*        1 presses of NEXT: 476
*        2 presses of NEXT: 83
*        3 presses of NEXT: 23
*        4 presses of NEXT: 5
*        5 presses of NEXT: 1
*     Words requiring at least one press of NEXT: 588 ( 6.5%)
*
*     PROMPT>java T9 d1-wordfreq-phoneks.txt -a
*     ... (truncated)
*     car bar cap 
*     case care base card bare cape 
*     based cared 
*     cases cards acres bases 
*     basin cargo 
*     cars bars bass 
*     ... (truncated)
*
*     PROMPT>java T9 d1-wordfreq-phoneks.txt -k
*     ... (truncated)
*     able 26890 2253S
*     cake 2256 2253NS
*     bald 569 2253NNS
*     calf 561 2253NNNS
*     calendar 1034 22536327S
*     baker 1716 22537S
*     cakes 828 22537NS
*     ... (truncated)
*
*     PROMPT>java T9 d2-wordfreq-phoneks.txt -s
*     Number of words: 64566
*     Ambiguous words: 8014 : (12.4%)
*     Ambiguous words requiring...
*        0 presses of NEXT: 3451
*        1 presses of NEXT: 3451
*        2 presses of NEXT: 721
*        3 presses of NEXT: 243
*        4 presses of NEXT: 86
*        5 presses of NEXT: 37
*        6 presses of NEXT: 13
*        7 presses of NEXT: 5
*        8 presses of NEXT: 3
*        9 presses of NEXT: 2
*        10 presses of NEXT: 1
*        11 presses of NEXT: 1
*     Words requiring at least press one of NEXT: 4563 : ( 7.1%)
*
*     PROMPT>java T9 fi-wordfreq-phoneks.txt -s              (Finnish)
*     Number of words: 200000
*     Ambiguous words: 15175 ( 7.6%)
*     Ambiguous words requiring...
*        0 presses of NEXT: 6626
*        1 presses of NEXT: 6626
*        2 presses of NEXT: 1311
*        3 presses of NEXT: 420
*        4 presses of NEXT: 120
*        5 presses of NEXT: 46
*        6 presses of NEXT: 17
*        7 presses of NEXT: 6
*        8 presses of NEXT: 1
*        9 presses of NEXT: 1
*        10 presses of NEXT: 1
*     Words requiring at least one press of NEXT: 8549 ( 4.3%)
* </pre>
 * 
 * @author Scott MacKenzie, 2001-2014
 */
public class T9
{
	public static void main(String[] args) throws IOException
	{
		// one command line argument needed
		if (args.length == 0 || args.length > 3)
		{
			System.out.println("usage: java T9 dictionary [-s] [-a] [-k]\n" + "\n"
					+ "   dictionary is a word+frequency+phonekeystrokes file\n" + "\n" + "   output options...\n"
					+ "   -s = summary output\n" + "   -a = ambiguous word sets\n" + "   -k = keystrokes\n"
					+ "   -c = collision analysis");
			return;
		}

		boolean summaryOption = false;
		boolean ambiguousOption = false;
		boolean ksOption = false;
		boolean collisionOption = false;

		for (int i = 1; i < args.length; ++i)
		{
			if (args[i].equals("-s"))
				summaryOption = true;
			if (args[i].equals("-a"))
				ambiguousOption = true;
			if (args[i].equals("-k"))
				ksOption = true;
			if (args[i].equals("-c"))
				collisionOption = true;

		}

		EncodedWord[] ew = EncodedWord.loadCodedDictionary(args[0]);
		Arrays.sort(ew, new ByCode());

		int totalWords = 0;
		int ambiguousWords = 0;
		int mostPressesOfNext = 0;
		int[] pressesOfNext = new int[10000];

		String[] matches;
		String wrd;
		String cde;
		int frq;
		String aSet = "";
		String aSetOld = "";

		for (int i = 0; i < ew.length; ++i)
		{
			++totalWords;

			wrd = ew[i].getWord();
			cde = ew[i].getCode();
			frq = ew[i].getFreq();

			matches = EncodedWord.getUnique(cde, ew);

			if (matches.length > 1)
			{
				++ambiguousWords;
				if (ambiguousOption)
				{
					for (int j = 0; j < matches.length; ++j)
						aSet = aSet + matches[j] + " ";
					if (!aSet.equals(aSetOld))
					{
						System.out.println(aSet);
						aSetOld = aSet;
					}
					aSet = "";
				}
			}

			if (matches.length > 1 && matches[0].equals(wrd))
				++pressesOfNext[0];

			if (matches.length > 1 && !matches[0].equals(wrd))
			{
				// find index of intended word in ambiguous set
				int k = 0;
				while (!wrd.equals(matches[k]))
				{
					cde += "N"; // press NEXT key
					++k;
				}

				++pressesOfNext[k];

				if (k > mostPressesOfNext)
					mostPressesOfNext = k;
			}
			cde += "S"; // terminating SPACE

			if (ksOption)
				System.out.println(wrd + " " + frq + " " + cde);

			if (collisionOption)
			{
				String w = ew[i].getWord();
				String c = ew[i].getCode();
				String[] u = EncodedWord.getUnique(c, ew);
				int position = -1;
				for (int j = 0; j < u.length; ++j)
				{
					if (u[j].equals(w))
					{
						position = j;
						break;
					}
				}

				System.out.print(w + " " + ew[i].getFreq() + " " + u.length + " " + position + " ");

				for (int j = 0; j < u.length; ++j)
					System.out.print(u[j] + " ");
				System.out.println();
			}

		}
		if (summaryOption)
		{
			int unambiguousWords = totalWords - ambiguousWords;

			String s = "";
			s += String.format("%-20s %d\n", "Number of words:", totalWords);
			s += String.format("%-20s %d : %.1f%%\n", "Unambiguous words:", unambiguousWords,
					(double)(unambiguousWords) / totalWords * 100.0, 4, 1);
			s += String.format("%-20s %d : %.1f%%\n", "Ambiguous words:", ambiguousWords, (double)ambiguousWords
					/ totalWords * 100.0, 4, 1);
			System.out.println(s);

			System.out.println("Word requiring n presses of NEXT...");
			for (int i = 0; i < mostPressesOfNext + 1; ++i)
			{
				if (i == 0)
					System.out.println(i + " : " + (pressesOfNext[i] + unambiguousWords));
				else
					System.out.println(i + " : " + pressesOfNext[i]);

				if (i > 0)
					;
			}
		}
	}
}
