import java.io.BufferedReader;
import java.io.File;
import java.io.FileReader;
import java.io.IOException;
import java.util.*;

/**
 * This program calculates the KSPC characteristic for various text entry techniques based on the
 * keystrokes required to enter words in the target language.
 * <p>
 * 
 * KSPC is an acronym for "keystrokes per character". It is the number of keystrokes required, on
 * average, to generate a character of text for a given text entry method in a given language.
 * <p>
 * 
 * Related references:
 * <p>
 * 
 * <ul>
 * <li><a href="http:www.yorku.ca/mack/hcimobile02.html">KSPC (Keystrokes per Character) as a
 * characteristic of text entry techniques</a>, by MacKenzie (<i>MobileHCI 2002</i>). This is the
 * paper that first introduced KSPC as a characteristic statistic for text entry methods.
 * <p>
 * 
 * <li><a href="http:www.yorku.ca/mack/chapter5.html">Text entry using a small number of
 * buttons</a>, by MacKenzie and Tanaka-Ishii (2007). This paper presents an analysis of KSPC and a
 * comparison of several text entry techniques using KSPC.
 * </ul>
 * 
 * Invocation:
 * <p>
 * 
 * <pre>
*     PROMPT>java KSPCWords file
*
*     where file = a word+freq+keystroke file
* </pre>
 * 
 * 
 * The argument <code>file</code> is a text file containing a series of lines, each with three
 * white-space delimited entries: a word, the word frequency, and the keystrokes to enter the word.
 * The keystroke entry should include a terminating SPACE character.
 * <p>
 * 
 * The output is the KSPC characteristic for the text entry technique.
 * <p>
 * 
 * Some example invocations are shown below. The file used in the 1st invocation is <a
 * href="d1-wordfreq-mtks.txt">d1-wordfreq-mtks.txt</a>. The third entry on each line is the
 * keystroke pattern needed to enter the word using the multitap a phone keypad (including a
 * terminating SPACE). This file was built from <a href="d1-wordfreq.txt">d1-wordfreq.txt</a> using
 * the <code>BuildKeystrokes</code> utility. Consult the API for <code>BuildKeystrokes</code> for
 * complete details. The other files in the invocation examples can be built using
 * <code>BuildKeystrokes</code>.
 * <p>
 * 
 * Invocations - Dictionary #1: (<a href="d1-wordfreq.txt">d1-wordfreq.txt</a> &ndash; 9022 words)
 * <p>
 * 
 * <pre>
*     PROMPT>java KSPCWords d1-wordfreq-mtks.txt
*     KSPC = 2.0242237284857083
*   
*     PROMPT>java KSPCWords d1-wordfreq-m2tks.txt
*     KSPC = 4.960524263792793
*
*     PROMPT>java KSPCWords d1-wordfreq-mt4ks.txt
*     KSPC = 3.4667227248852397
*
*     PROMPT>java KSPCWords d1-wordfreq-mt6ks.txt
*     KSPC = 3.139541982622594
*
*     PROMPT>java KSPCWords d1-wordfreq-t9ks.txt  
*     KSPC = 1.0064113710167126
*
*     PROMPT>java KSPCWords d1-wordfreq-t6ks.txt  
*     KSPC = 1.0288350850177785
*
*     PROMPT>java KSPCWords d1-wordfreq-t4ks.txt  
*     KSPC = 1.0669724260825588
*
*     PROMPT>java KSPCWords d1-wordfreq-t2ks.txt  
*     KSPC = 1.547122102995653
*     
*     PROMPT>java KSPCWords d1-wordfreq-lwks.txt          
*     KSPC = 1.1467001895323452
*     
*     PROMPT>java KSPCWords d1-wordfreq-meks.txt           
*     KSPC = 1.8157369585513656
*     
*     PROMPT>java KSPCWords d1-wordfreq-pagerks.txt           
*     KSPC = 3.1247507835761943
*     
*     PROMPT>java KSPCWords d1-wordfreq-datestamp1ks.txt   
*     KSPC = 10.711223210678188
*     
*     PROMPT>java KSPCWords d1-wordfreq-datestamp2ks.txt   
*     KSPC = 10.55073639862169
*     
*     PROMPT>java KSPCWords d1-wordfreq-datestamp6ks.txt   
*     KSPC = 4.103238636279833
*     
*     java KSPCWords d1-wordfreq-wpk10ks.txt 
*     KSPC = 0.5050941156663911
*     
*     java KSPCWords d1-wordfreq-wpk5ks.txt 
*     KSPC = 0.7939110125879739
*     
*     java KSPCWords d1-wordfreq-wpk2ks.txt   
*     KSPC = 0.7292869590025196
*     
*     java KSPCWords d1-wordfreq-wpk1ks.txt  
*     KSPC = 0.6867196447839975
*     
*     java KSPCWords d1-wordfreq-wps1ks.txt   
*     KSPC = 0.7175690315314045
*     
*     java KSPCWords d1-wordfreq-wps2ks.txt   
*     KSPC = 0.6272164642142578
*     
*     java KSPCWords d1-wordfreq-wps5ks.txt   
*     KSPC = 0.5366359936980745
*     
*     java KSPCWords d1-wordfreq-wps10ks.txt  
*     KSPC = 0.48955932330736396
*     
*     java KSPCWords d1-wordfreq-ttnsks.txt  
*     KSPC = 1.6412600112779792
* </pre>
 * 
 * Invocations - Dictionary #2: (<a href="d2-wordfreq.txt">d2-wordfreq.txt</a> &ndash; 64,566 words)
 * <p>
 * 
 * <pre>
*     PROMPT>java KSPCWords d2-wordfreq-mtks.txt          (Multitap)
*     KSPC = 2.0341507188198014
*
*     PROMPT>java KSPCWords d2-wordfreq-t9ks.txt          (T9)
*     KSPC = 1.0072058361585188
*
*     PROMPT>java KSPCWords d2-wordfreq-lwks.txt          (LetterWise)
*     KSPC = 1.1500484081836353
*
*     PROMPT>java KSPCWords d2-wordfreq-meks.txt          (MessageEase)
*     KSPC = 1.8209716971531273
*
*     PROMPT>java KSPCWords d2-wordfreq-pagerks.txt       (Glenayre pager)
*     KSPC = 3.132031659103719
*
*     PROMPT>java KSPCWords d2-wordfreq-datestamp1ks.txt  (date stamp method #1)
*     KSPC = 10.65975600545338
*
*     PROMPT>java KSPCWords d2-wordfreq-datestamp2ks.txt  (date stamp method #2)
*     KSPC = 10.619927472302642
*
*     PROMPT>java KSPCWords d2-wordfreq-datestamp6ks.txt  (date stamp method #6)
*     KSPC = 4.226292050082005
*
*     PROMPT>java KSPCWords d2-wordfreq-wpk10ks.txt  
*     KSPC = 0.8131640271181506
*
*     PROMPT>java KSPCWords d2-wordfreq-wpk5ks.txt   
*     KSPC = 0.748312907208014
*
*     PROMPT>java KSPCWords d2-wordfreq-wpk2ks.txt   
*     KSPC = 0.7085798740624372
*
*     PROMPT>java KSPCWords d2-wordfreq-wpk1ks.txt   
*     KSPC = 0.7390759336078825
*
*     PROMPT>java KSPCWords d2-wordfreq-wps1ks.txt   
*     KSPC = 0.7390759336078825
*
*     PROMPT>java KSPCWords d2-wordfreq-wps2ks.txt 
*     KSPC = 0.6465846451367377
*
*     PROMPT>java KSPCWords d2-wordfreq-wps5ks.txt  
*     KSPC = 0.5506287228310269
*
*     PROMPT>java KSPCWords d2-wordfreq-wps10ks.txt 
*     KSPC = 0.5000315114434339
* </pre>
 * 
 * Invocations - Brown corpus: (<a href="bc-wordfreq.txt">bc-wordfreq.txt</a> &ndash; 41,532 words)
 * <p>
 * 
 * <pre>
*     PROMPT>java KSPCWords bc-wordfreq-mtks.txt
*     KSPC = 2.0442649462511326
*     
*     PROMPT>java KSPCWords bc-wordfreq-t9ks.txt  
*     KSPC = 1.008214539459543
*     
*     PROMPT>java KSPCWords bc-wordfreq-lwks.txt          
*     KSPC = 1.1623250386744075
*     
*     PROMPT>java KSPCWords bc-wordfreq-meks.txt           
*     KSPC = 1.8239574145624717
*     
*     PROMPT>java KSPCWords bc-wordfreq-pagerks.txt           
*     KSPC = 3.140325186207169
*     
*     PROMPT>java KSPCWords bc-wordfreq-datestamp1ks.txt   
*     KSPC = 10.564776588456656
*     
*     PROMPT>java KSPCWords bc-wordfreq-datestamp2ks.txt   
*     KSPC = 10.603084634745237
*
*     PROMPT>java KSPCWords bc-wordfreq-datestamp6ks.txt   
*     KSPC = 4.314453728184652
*
*     PROMPT>java KSPCWords bc-wordfreq-wpk10ks.txt  
*     KSPC = 0.8124519768507685
*
*     PROMPT>java KSPCWords bc-wordfreq-wpk5ks.txt   
*     KSPC = 0.7488902395077912
*
*     PROMPT>java KSPCWords bc-wordfreq-wpk2ks.txt    
*     KSPC = 0.7082677289025504
*
*     PROMPT>java KSPCWords bc-wordfreq-wpk1ks.txt  
*     KSPC = 0.7372019741154122
*
*     PROMPT>java KSPCWords bc-wordfreq-wps1ks.txt  
*     KSPC = 0.7372019741154122
*
*     PROMPT>java KSPCWords bc-wordfreq-wps2ks.txt   
*     KSPC = 0.7372019741154122
*
*     PROMPT>java KSPCWords bc-wordfreq-wps5ks.txt  
*     KSPC = 0.6479416532634211
*
*     PROMPT>java KSPCWords bc-wordfreq-wps10ks.txt  
*     KSPC = 0.5551980768504509
* </pre>
 * 
 * Invocations - Finnish corpus: (<a href="fi-wordfreq.txt">fi-wordfreq.txt</a> &ndash; 200,000
 * words)
 * <p>
 * 
 * <pre>
*     PROMPT>java KSPCWords fi-wordfreq-mtks.txt
*     KSPC = 2.2371840963420784
*     
*     PROMPT>java KSPCWords fi-wordfreq-t9ks.txt
*     KSPC = 1.0043453727052178
*
*     PROMPT>java KSPCWords fi-wordfreq-wpk1ks.txt
*     KSPC = 0.7637596123000695
* </pre>
 * 
 * @author Scott MacKenzie, 2001-2015
 */
public class KSPCWords
{
	public static void main(String[] args) throws IOException
	{

		if (args.length != 1)
		{
			System.out.printf("usage: KSPCWords file\n");
			System.out.printf("\nwhere file = a word-freq-ks file\n");
			return;
		}

		// convert command-line arguments to variables
		String inFile = args[0];

		// make sure file exists
		File f = new File(inFile);
		if (!f.exists())
		{
			System.out.printf("File not found: %s\n", inFile);
			System.exit(1);
		}

		// open dictionary file for input
		BufferedReader inputFile = new BufferedReader(new FileReader(inFile));

		// process lines until no more input
		String line;
		long totalKeystrokes27 = 0;
		long totalChar27 = 0;
		while ((line = inputFile.readLine()) != null)
		{
			StringTokenizer st = new StringTokenizer(line);
			if (st.countTokens() != 3)
			{
				System.out.printf("data format error\n");
				System.exit(1);
			}

			String word = st.nextToken();
			long freq = Integer.parseInt(st.nextToken());
			String keystrokes = st.nextToken();

			totalKeystrokes27 += keystrokes.length() * freq;

			totalChar27 += (word.length() + 1) * freq;
		}

		// compute KSPC
		double kspc27 = (double)totalKeystrokes27 / totalChar27;

		// output result
		System.out.printf("KSPC = %.4f\n", kspc27);
	}
}
