import java.io.*;
import java.util.*;

/**
 * <h1>ScanningKeyboardSPC</h1>
 * 
 * <h3>Summary</h3>
 * 
 * <ul>
 * <li>
 * Compute the SPC (scan steps per character) for a scanning keyboard.
 * <p>
 * 
 * <li>Scanning keyboards are commonly used in accessible computing. Input uses a single key or
 * switch.
 * <p>
 * 
 * <li>Definition of SPC: the average number of scan steps required to generate a character of text
 * in a given language using a given scanning keyboard.
 * <p>
 * 
 * <li>SPC is the same as KSPC (keystrokes per character), except that the calculation uses scan
 * steps, rather than keystrokes. In both cases, each word is assumed to end with a terminating
 * SPACE character.
 * </ul>
 * 
 * <h3>Related references</h3>
 * 
 * <ul>
 * <li>MacKenzie, I. S. (2012). <a href="http://www.yorku.ca/mack/icchp2012.html">Modeling text
 * input for single-switch scanning</a>. <i>Proceedings of the 13th International Conference on
 * Computers Helping People with Special Needs - ICCHP 2012</i>, 423-430. Berlin: Springer.
 * <p>
 * 
 * <li>MacKenzie, I. S. (2002). <a href="http://www.yorku.ca/mack/hcimobile02.html">KSPC (keystrokes
 * per character) as a characteristic of text entry techniques</a>. <i>Proceedings of the Fourth
 * International Symposium on Human Computer Interaction with Mobile Devices</i>, pp. 195-210.
 * Heidelberg, Germany: Springer-Verlag.
 * <p>
 * 
 * <li>Koester, H. H., & Levine, S. P. (1994). <a href=
 * "http://www.kpronline.com/files/KOESTER-LEVINE-1994-AT-Learning-and-performance-of-able-bodied-individuals-using-scanning-systems-with-and-without-word-prediction.pdf"
 * >Learning and performance of able-bodied individuals using scanning systems with and without word
 * prediction</a>. <i>Assistive Technology</i>, 6, 42-53.
 * </ul>
 * 
 * <h3>Details</h3>
 * 
 * The following usage message appears if the program is launched without arguments:
 * <p>
 * 
 * <pre>
*     <PROMPT>java ScanningKeyboardSPC file -ak|-kk|ck... [-wfs] [kl] [-spc] [-wp] [-hh]
*     
*     where file = word-frequency dictionary file
*           -ak = alpha keyboard layout (_abcde-fghijk-lmnopq-rstuvw-xyz...)
*           -kk = Koester-Levine keyboard layout (_eardu-tnsfwb-ohcpvj-imkkq.-lgxz..)
*           -ck... = custom keyboard layout (e.g., -ck_ABCD-EFGHI-KLMNO-PQRST-WXYZ~)
*           -wfs = output word-freq-scans
*           -kl = output keyboard layout
*           -spc = output Scan Steps Per Characters statistic
*           -wp = word predict (use "W" (uppercase) in layout to indicate column)
*           -hh = assume half-and-half scanning for word prediction
* </pre>
 * <p>
 * 
 * The first argument is a word-frequency dictionary. This is a reduced version of the language
 * corpus used for the analysis. For the analyses in the first reference above, three such
 * word-frequency files were used: <a href="d1-wordfreq.txt">d1-wordfreq.txt</a>, <a
 * href="d2-wordfreq.txt">d2-wordfreq.txt</a>, <a href="bc-wordfreq.txt">bc-wordfreq.txt</a>, and <a
 * href="phrases2-wordfreq.txt">phrases2-wordfreq.txt</a>. Consult the first reference above for
 * details on the original corpora.
 * <p>
 * 
 * Optional arguments appear next.
 * <p>
 * 
 * The <code>-ak</code> and <code>-kk</code> options are used for two layouts that are hard-coded in
 * the application. These are an alphabetic layout (<code>-ak</code> option) and an optimized layout
 * proposed in the Koester and Levine reference (<code>-kk</code> option). See below.
 * <p>
 * 
 * <pre>
*      ALPHABETIC      KOESTER-LEVINE
*      ===========     ===========
*      _ a b c d e     _ e a r d u
*      f g h i j k     t n s f w b 
*      l m n o p q     o h c p v j
*      r s t u v w     i m y k q . 
*      x y z . . .     l g x z . .
* </pre>
 * <p>
 * 
 * An underscore ("<code>_</code>") represents the SPACE character.  Periods (".") are unassigned
 * keys.
 * <p>
 * 
 * The <code>-ck</code> option is used to specify a custom layout. A dash is used as a row
 * separator. There must be an equal number of characters per row. Use lowercase letters. See
 * example invocations below.
 * <p>
 * 
 * There are three output options:
 * <p>
 * 
 * <ul>
 * <li><code>-wfs</code> &ndash; output the word-frequency list; each line is appended with the
 * coded scan steps for the word
 * <p>
 * 
 * <li><code>-kl</code> &ndash; output the keyboard layout
 * <p>
 * 
 * <li><code>-spc</code> &ndash; output the computed scan steps per character (SPC)
 * </ul>
 * <p>
 * 
 * The <code>-wp</code> option is for word prediction. This option is used for keyboards that
 * include a dedicated column for words. The word entries are identified by W (uppercase) in the
 * column where the words appear. Words are predicted from the current word stem and are assumed to
 * populate the word list after each character entered. Words are sorted by their probability in the
 * word-frequency file. The computation of SPC will consider the contents of this list for optimal
 * input (i.e., lowest SPC).
 * <p>
 * 
 * The <code>-hh</code> option is for "half-and-half scanning", as proposed by Koester and Levine.
 * This option only applies if the <code>-wp</code> option is also used. Half-and-half-scanning is
 * used to speed-up entry with word prediction. Scanning initially alternates between the
 * letter-region and the word-region of the keyboard. The user first selects in the desired region,
 * then selects within the letter or word region of the keyboard. Within the letter region,
 * conventional row-column scanning is used.
 * <p>
 * 
 * <h3>Example invocations</h3>
 * 
 * <pre>
*     PROMPT>java ScanningKeyboardSPC d1-wordfreq.txt -ak -spc -kl
*     Scanning keyboard layout...
*        _ a b c d e 
*        f g h i j k 
*        l m n o p q 
*        r s t u v w 
*        x y z . . .      
*     WordFreqFile=d1-wordfreq.txt, SPC = 5.19
*
*     PROMPT>java ScanningKeyboardSPC d1-wordfreq.txt -kk -spc -kl
*     Scanning keyboard layout...
*        _ e a r d u 
*        t n s f w b 
*        o h c p v j 
*        i m y k q . 
*        l g x z . .    
*     WordFreqFile=d1-wordfreq.txt, SPC = 4.28
*
*     PROMPT>java ScanningKeyboardSPC d1-wordfreq.txt -kk -wfs
*     the 5776384 .Rt..R.hR.eR_
*     of 2789403 ..Ro.R...fR_
*     and 2421302 R..a.R.nR....dR_
*     a 1939617 R..aR_
*     in 1695860 ...Ri.R.nR_
*     to 1468146 .Rt..RoR_
*     is 892937 ...Ri.R..sR_
*     ...
*
*     PROMPT>java ScanningKeyboardSPC d1-wordfreq.txt -ck_abcd-efghi-jklmn-opqrs-tuvwx-yz... -kl -spc
*     Keyboard layout...
*        _ a b c d 
*        e f g h i 
*        j k l m n 
*        o p q r s 
*        t u v w x 
*        y z . . . 
*     WordFreqFile=d1-wordfreq.txt, SPC = 5.14
*     
*     PROMPT>java ScanningKeyboardSPC d1-wordfreq.txt -ck_earduW-tnsfwbW-ohcpvjW-imykq.W-lgxz..W-......W -kl -spc -wp 
*     Keyboard layout...
*        _ e a r d u W 
*        t n s f w b W 
*        o h c p v j W 
*        i m y k q . W 
*        l g x z . . W 
*        . . . . . . W 
*     WordFreqFile=d1-wordfreq.txt, SPC= 3.35
*     
*     PROMPT>java ScanningKeyboardSPC d1-wordfreq.txt -ck_earduW-tnsfwbW-ohcpvjW-imykq.W-lgxz..W-......W -kl -spc -wp -hh
*     Keyboard layout...
*        _ e a r d u W 
*        t n s f w b W 
*        o h c p v j W 
*        i m y k q . W 
*        l g x z . . W 
*        . . . . . . W 
*     WordFreqFile=d1-wordfreq.txt, SPC= 2.73
*     
* </pre>
 * 
 * The last two invocations are examples with word prediction. The word prediction list is
 * included as an extra column in the layout. Uppercase <font size=+1><code>W</code></font>
 * indicates the column. As proposed by Koester and Levine, there are six words in the list, so an
 * extra row is added. Note the reduced SPC with half-and-half scanning, from SPC = 3.35 to SPC =
 * 2.73.
 * <p>
 * 
 * Obviously, the command-line arguments are long and complicated when doing analyses on keyboards with custom layouts.  
 * One approach is to organize the analyses in a batch file.  As an example, the batch file 
 * <a href="ScanningKeyboardSPC.bat">ScanningKeyboardSPC.bat</a> was used to generate the analyses used
 * in the MacKenzie <i>ICCHP 2012</i> paper.
 * <p>
 * 
 * A separate application called <a href="WordPredict.html"><code>WordPredict</code></a> is available to view the word prediction lists,
 * if desired.
 * <p>
 * 
 * @author Scott MacKenzie, 2009-2015
 */
public class ScanningKeyboardSPC
{
	static int wpColumn;
	static int wpFirstRow;
	static int wpListSize;

	public static void main(String[] args) throws IOException
	{
		// as per Koester and Levine (1994)
		char[][] koesterKeyboard = { { '_', 'e', 'a', 'r', 'd', 'u' }, { 't', 'n', 's', 'f', 'w', 'b' },
				{ 'o', 'h', 'c', 'p', 'v', 'j' }, { 'i', 'm', 'y', 'k', 'q', '.' }, { 'l', 'g', 'x', 'z', '.', '.' } };

		char[][] alphaKeyboard = { { '_', 'a', 'b', 'c', 'd', 'e' }, { 'f', 'g', 'h', 'i', 'j', 'k' },
				{ 'l', 'm', 'n', 'o', 'p', 'q' }, { 'r', 's', 't', 'u', 'v', 'w' }, { 'x', 'y', 'z', '.', '.', '.' } };

		if (args.length < 1)
		{
			System.out.println("Usage: java ScanningKeyboardSPC file -ak|-kk|-ck... [-wfs]\n\n"
					+ "where file = word-frequency dictionary file\n"
					+ "      -ak = alpha keyboard layout (_abcde-fghijk-lmnopq-rstuvw-xyz...)\n"
					+ "      -kk = Koester-Levine keyboard layout (_eardu-tnsfwb-ohcpvj-imykq,-lgxz..)\n"
					+ "      -ck... = custom keyboard layout (e.g., -ck_abcd-efghi-klmno-pqrst-wxyz.)\n"
					+ "      -wfs = output word-freq-scans\n" + "      -kl = output keyboard layout\n"
					+ "      -spc = output Scan Steps Per Characters statistic\n"
					+ "      -wp = word predict (use \"W\" (uppercase) in layout to indicate column)\n"
					+ "      -hh = assume half-and-half scanning for word prediction");
			System.exit(0);
		}

		boolean akOption = false; // don't use with WP options
		boolean kkOption = false; // don't use with WP options
		boolean ckOption = false;
		boolean wfsOption = false;
		boolean klOption = false;
		boolean spcOption = false;
		boolean wpOption = false;
		boolean hhOption = false;

		wpColumn = -1; // make static so it can be used in other methods
		wpFirstRow = -1;
		wpListSize = -1; // set to number of rows

		if (args[1].equals("-ak"))
			akOption = true;
		else if (args[1].equals("-kk"))
			kkOption = true;
		else if (args[1].substring(0, 3).equals("-ck"))
			ckOption = true;

		// get other options (must be at least 1 or no output)
		for (int i = 2; i < args.length; ++i)
		{
			if (args[i].equals("-wfs"))
				wfsOption = true;
			if (args[i].equals("-kl"))
				klOption = true;
			if (args[i].equals("-spc"))
				spcOption = true;
			if (args[i].substring(0, 3).equals("-wp"))
				wpOption = true;
			if (args[i].equals("-hh"))
				hhOption = true;
		}

		WordPredict wp;
		wp = wpOption ? new WordPredict(args[0]) : null;

		// the keyboard k is a character matrix
		char[][] k = null;

		if (akOption)
			k = alphaKeyboard;

		else if (kkOption)
			k = koesterKeyboard;

		else if (ckOption) // load keyboard
		{
			String keyboardDef = args[1].substring(3);
			StringTokenizer st = new StringTokenizer(keyboardDef, "-");
			int rows = st.countTokens();
			wpListSize = 0;
			int columns = keyboardDef.length() / rows;
			k = new char[rows][columns];
			int i = 0;
			while (st.hasMoreTokens())
			{
				String tmp = st.nextToken();
				if (tmp.length() != columns)
				{
					System.out.println("Error: Rows must be same size!");
					System.exit(0);
				}
				k[i] = tmp.toCharArray();

				for (int j = 0; j < k[i].length; ++j)
				{
					if (k[i][j] == 'W')
					{
						++wpListSize;
						if (wpFirstRow == -1)
							wpFirstRow = i;
						if (wpColumn == -1)
							wpColumn = j;
					}
				}
				++i;
			}
			if (wpOption && wpColumn == -1)
			{
				System.out.println("Can't determine WP column!");
				System.exit(0);
			}
			if (wpOption && wpListSize == -1)
			{
				System.out.println("Can't determine WP list size!");
				System.exit(0);
			}
		}

		// print keyboard layout
		if (klOption)
		{
			System.out.println("Keyboard layout...");
			for (int i = 0; i < k.length; ++i)
			{
				System.out.print("   ");
				for (int j = 0; j < k[0].length; ++j)
					System.out.print(k[i][j] + " ");
				System.out.println();
			}
		}

		Word w[] = Word.loadDictionary(args[0]);

		long scanSteps = 0;
		long characters = 0;
		for (int i = 0; i < w.length; ++i)
		{
			String word = w[i].getWord();
			String wordWithSpace = word + "_"; // append SPACE
			long freq = w[i].getFreq();
			String scans = getScans(k, wordWithSpace, wp, hhOption);

			if (wfsOption)
				System.out.printf("%s %d %s\n", word, freq, scans);

			scanSteps += scans.length() * freq;
			characters += wordWithSpace.length() * freq;
		}

		double spc = (double)scanSteps / characters;
		if (spcOption)
			System.out.printf("WordFreqFile=%s, SPC= %.2f\n", args[0], spc);
	}

	public static int getRowIndex(char[][] keyboardArg, char cArg)
	{
		char[][] keyboard = keyboardArg;
		char c = Character.toLowerCase(cArg);

		for (int i = 0; i < keyboard.length; ++i)
		{
			for (int j = 0; j < keyboard[0].length; ++j)
			{
				if (c == Character.toLowerCase(keyboard[i][j]))
					return i;
			}
		}
		return -1;
	}

	public static int getColumnIndex(char[][] keyboardArg, char cArg)
	{
		char[][] keyboard = keyboardArg;
		char c = Character.toLowerCase(cArg);

		for (int i = 0; i < keyboard.length; ++i)
		{
			for (int j = 0; j < keyboard[0].length; ++j)
			{
				if (c == Character.toLowerCase(keyboard[i][j]))
					return j;
			}
		}
		return -1;
	}

	public static String getScans(char[][] keyboardArg, String wordArg, WordPredict wpArg, boolean hhOptionArg)
	{
		char[][] keyboard = keyboardArg;
		boolean hhOption = hhOptionArg;
		String word = wordArg;
		String scans = "";
		String stem = "";
		for (int i = 0; i < word.length(); ++i)
		{
			// Includes "discount word prediction" -- top n words presented right away
			// -----
			if (wpArg != null) // word predict option
			{
				String[] wordList = wpArg.getWords(stem, wpListSize);

				if (wordList != null)
				{
					// Is the word in the list?
					boolean foundWord = false;
					int m = -1;
					for (m = 0; m < wordList.length; ++m)
					{
						if ((wordList[m] + "_").equals(word)) // YES
						{
							foundWord = true; // m is the row index
							break;
						}
					}

					if (foundWord)
					{
						if (hhOption)
						{
							scans += ".H";
							for (int k = 0; k < m; ++k)
								scans += ".";
							scans += "W";
						} else
						{
							for (int j = 0; j < m; ++j)
								scans += "."; // passive scan step
							scans += "R"; // row select
							for (int k = 0; k < wpColumn; ++k)
								scans += "."; // passive scan
							scans += 'W'; // word select
						}
						break; // done!
					}
				}
			}
			// -----

			char c = word.charAt(i);
			stem += c;
			// System.out.println("stem=" + stem);

			int rowIndex = getRowIndex(keyboard, c);
			int columnIndex = getColumnIndex(keyboard, c);
			if (rowIndex == -1 || columnIndex == -1)
			{
				System.out.printf("Error: character (%c) not found!", c);
				System.exit(0);
			}
			if (hhOption)
				scans += "H";
			for (int j = 0; j < rowIndex; ++j)
				scans += "."; // passive scan step
			scans += "R"; // row select
			for (int k = 0; k < columnIndex; ++k)
				scans += "."; // passive scan
			scans += c; // character select
		}
		return scans;
	}
}
