Sunday, March 21, 2010

Binary Search Tree text file word search

Here's a program I made that will accept a text file (.txt, not Microsoft word or pdf stuff, just a basic text file) as a COMMAND LINE ARGUMENT and store each word (accepting ALL punctuation as if it were a space character, including apostrophe's) in a binary tree.  Each node in the tree stores an ArrayList which holds the number of times that word occurs(first element) and each position in the text file that it occurs (all following elements).  The user is shown the total number of words, and you can search for a word to see how many times and where in the file it is stored. Enjoy!

Just copy all these code snippets into separate text files with the same name as
the headings, compile them, and then run "java A3Driver textfile.txt", and you should see a GUI pop up.


A3Driver.java
 /**  
Assignment3
@author MasudKhan
This class is a driver for a program which reads in a file,
whose name is given as a command line argument, and stores each
word as well as how many occurences of that word and the positions of
each occurence in the text file. The program then offers to allow
the user to search for any input word and produce the info for it,
if any.
*/
import java.io.*;
import java.util.*;
public class A3Driver
{
/**
main method for Binary word search program.
*/
public static void main(String[] args)
{
WordBST newTestWordBST = new WordBST(new File(args[0]));
System.out.println("Total number of words in file: " +
WordBST.wordPosition);
System.out.println("Number of unique words in file: " +
WordBST.uniqueWordCount);
System.out.print("Most used word: " + WordBST.mostUsedWord);
System.out.println(", times used: " + WordBST.highestCount);
System.out.println("\n\n");
Scanner keyboard = new Scanner(System.in);
String input = "";
while(!input.equals("n"))
{
System.out.print("Search for word (y/n)? ");
input = keyboard.nextLine();
if(input.toLowerCase().equals("y"))
{
System.out.print("Enter word: ");
newTestWordBST.searchBinaryTree(keyboard.nextLine());
}
}
System.out.println("Thank you, goodbye.");
/**
To output all tree info, uncomment this section.
newTestWordBST.inOrderTraverse();
*/
}
}


WordBST.java
 /**  
Assignment3
@author MasudKhan
This class holds a Binary search tree of WordBSTNodes.
*/
import java.io.*;
import java.util.*;
public class WordBST
{
WordBSTNode root;
static int wordPosition, uniqueWordCount = 0, highestCount;
static String mostUsedWord;
/**
Constructor: initializes the root to null.
*/
public WordBST()
{
root = null;
}
/**
Constructor: initializes root to parameter value.
@param theRoot the root of this WordBST object
*/
public WordBST(WordBSTNode theRoot)
{
root = theRoot;
}
/**
Constructor: initializes root to hold nodes containing the words and
data from the parameter file.
@param text the input file containing words
*/
public WordBST(File text)
{
try
{
BufferedReader bR = new BufferedReader(new FileReader(text));
root = readBinaryTree(bR);
}
catch(IOException e)
{
System.out.println("Error reading file. Exiting.");
System.exit(1);
}
}
/**
Wrapper method for the recursive add method.
@param item the item to add to the BST
*/
public void add(String item)
{
root = add(root, item);
}
/**
Postcondition: uses recursion to add item to the BST node in the
appropriate position, or add information to the matching node.
@param localRoot the local node being checked in the current call
@param item the item to add to the BST
*/
private WordBSTNode add(WordBSTNode localRoot, String item)
{
if(localRoot == null) // item not in tree - add it
{
uniqueWordCount++; // total distinct word count
ArrayList<Integer> temp = new ArrayList<Integer>();
temp.add(1);
temp.add(wordPosition);
return new WordBSTNode(item, temp);
}
else if(item.compareTo(localRoot.word) == 0) // item == localRootData
{
localRoot.countAndPos.set(0, localRoot.countAndPos.get(0) + 1);
localRoot.countAndPos.add(wordPosition);
if(localRoot.countAndPos.get(0) > highestCount)
{
highestCount = localRoot.countAndPos.get(0);
mostUsedWord = localRoot.word;
}
return localRoot;
}
else if(item.compareTo(localRoot.word) < 0) //item < localRootData
{
localRoot.leftTree = add(localRoot.leftTree, item);
return localRoot;
}
else // item > localRootData
{
localRoot.rightTree = add(localRoot.rightTree, item);
return localRoot;
}
}
/**
Postcondition: performs an inorder traversal.
*/
public void inOrderTraverse()
{
inOrderTraverse(root);
}
/**
Perform an inorder traversal.
@param localRoot the current node being traversed
*/
private void inOrderTraverse(WordBSTNode localRoot)
{
if(localRoot.leftTree != null) // left
{
inOrderTraverse(localRoot.leftTree);
}
// middle
//output current root
System.out.print(localRoot.word);
for(int i = 0; i < localRoot.countAndPos.size(); i++)
{
System.out.print(", " + localRoot.countAndPos.get(i));
}
System.out.println();
if(localRoot.rightTree != null) // right
{
inOrderTraverse(localRoot.rightTree);
}
}
/**
Wrapper method for searchBinaryTree recursive method.
@param searchWord the String word to search for
*/
public void searchBinaryTree(String searchWord)
{
searchBinaryTree(searchWord, root);
}
/**
Postcondition: if word is found in the search, it is output along with
occurrence information, and if it is not found, not-found info is output.
@param searchWord the word to search for
@param localRoot the localRoot being checked in the current call
*/
public void searchBinaryTree(String searchWord, WordBSTNode localRoot)
{
if( (searchWord.compareTo(localRoot.word) < 0) &&
(localRoot.leftTree != null) )
{
searchBinaryTree(searchWord, localRoot.leftTree);
}
else if( (searchWord.compareTo(localRoot.word) > 0) &&
(localRoot.rightTree != null) )
{
searchBinaryTree(searchWord, localRoot.rightTree);
}
else if(searchWord.compareTo(localRoot.word) == 0)
{
System.out.println("Position number(s) of occurence(s):");
for(int i = 1; i < localRoot.countAndPos.size(); i++)
{
System.out.println("word #" + localRoot.countAndPos.get(i));
}
System.out.println("Word found.");
System.out.println("Occurences: " +
localRoot.countAndPos.get(0));
}
else
{
System.out.println("Word does not exist.");
}
}
/**
@param bR the bufferedReader to read from
@return returns a node containing the info from the file which the input
BufferedReader is reading from
*/
public WordBSTNode readBinaryTree(BufferedReader bR)
{
String data = "";
String temp;
try
{
while(bR.ready())
{
data = data.concat(bR.readLine().toLowerCase() + "\n");
}
if(data == "")
{
return null;
}
}
catch(IOException e)
{
System.out.println("Error reading file. Exiting.");
System.exit(1);
}
return readBinaryTree(
new StringTokenizer(data,
" \t\n\r\f.,!`'-\"\\:()[]{}=+_*&^%$#@?<>;|/~"));
}
/**
@param inputST the StringTokenizer to repeatedly add words from
@return returns a node containing the info from the file which the input
BufferedReader is reading from
*/
public WordBSTNode readBinaryTree(StringTokenizer inputST)
{
wordPosition = 1;
ArrayList<Integer> tempArrayList =
new ArrayList<Integer>(wordPosition++);
tempArrayList.add(1);
WordBST tempBST = new WordBST(
new WordBSTNode(inputST.nextToken(), tempArrayList));
highestCount = 1;
mostUsedWord = tempBST.root.word;
while(inputST.hasMoreTokens())
{
tempBST.add(inputST.nextToken());
wordPosition++; // current position, and total words in file
}
return tempBST.root;
}
}


WordBSTNode.java
 /**  
Assignment3
@author MasudKhan
This class holds a node for a binary search tree. The node includes
a pointer to it's left subtree and it's right subtree, as well as String
data, and an ArrayList containing the number of occurences of the
data in the user's input file in the first position, followed by the
word number in each following position.
*/
import java.util.*;
public class WordBSTNode
{
WordBSTNode leftTree;
WordBSTNode rightTree;
String word;
ArrayList<Integer> countAndPos;
/**
Constructor: no arg constructor must never be used to avoid
confusion between a null node element and a null element content.
*/
public WordBSTNode()
{
System.out.println("Cannot creat empty node, sorry.");
}
/**
Constructor: initializes parent, word, and countAndPos instance
variables.
@param theWord the word to be stored in this node
@param theCountAndPos an ArrayList containing this node's word's
count and position's
*/
public WordBSTNode(String theWord, ArrayList<Integer> theCountAndPos)
{
leftTree = null;
rightTree = null;
word = theWord;
countAndPos = theCountAndPos;
}
}


Feedback from dev's/student dev's is welcome!

1 comment:

  1. Hi,

    I am trying to execute the above code but i get the following error:

    Exception in thread "main" java.lang.ArrayIndexOutOfBoundsException: 0
    at a3driver.A3Driver.main(A3Driver.java:20)
    Java Result: 1

    Please explain.

    Kevin

    ReplyDelete