
Originally Posted by
concerto49
Assuming it looks like:
<key1> <value1> <key2> <value>... and IF search string is a substring of a key, return each of these results?
How are you hashing it right now? Are you hashing every substring possible?
The code I have written to read the file is as mentioned below :-
Please suggest us the changes or any other better way so that we can improve the reading and search performance by less than 5 milliseconds.
1. File CharByCharSearch.java import java.io.BufferedReader; import java.util.ArrayList; import java.util.HashMap; import java.util.Set; public class CharByCharSearch { private static HashMap<String, String> mapForKeyValues = new HashMap<String, String>(); private static CharByCharSearch getHtml = null; private static ThreadLocal localPool = new ThreadLocal(); private static BufferedReader dataInputStream = null; static { getHtml = new CharByCharSearch(); dataInputStream = FileReader.getFileContentsBR(); getHtml.grabHTMLLinksSearch(); localPool.set(mapForKeyValues); } public CharByCharSearch() { } public void grabHTMLLinksSearch() { String html = ""; try { long milliSeconds1 = System.currentTimeMillis(); long milliSeconds2 = 0l; html = dataInputStream.readLine(); while (null != html) { milliSeconds2 = System.currentTimeMillis(); String firstS = html.substring( html.toLowerCase().indexOf("=") + 2, html.length()); mapForKeyValues.put(html.substring(0, html.indexOf("<") - 1) .toLowerCase(), firstS.substring(0, firstS.indexOf(" ") - 1)); html = dataInputStream.readLine(); } System.out.println("time took to search the keyword@@@@ " + (milliSeconds2 - milliSeconds1)); } catch (Exception e) { System.out.println("error when getting the data"); e.printStackTrace(); } finally { try { if (null != dataInputStream) { dataInputStream.close(); } } catch (Exception e) { e.printStackTrace(); } } } public ArrayList<String> search(String searchWord) { ArrayList<String> linkURLS = new ArrayList<String>(); String searchKey = searchWord.toLowerCase(); String[] searchKeyValues = searchKey.split(" "); int len = searchKeyValues.length; HashMap<String, String> hashMap = (HashMap<String, String>) localPool .get(); Set<String> keys = hashMap.keySet(); for (String key : keys) { int index = searchByChar.kmp(searchKey, key); if (key.length() >= 10) { if (-1 != index) {// rule 1 & 2 linkURLS.add(mapForKeyValues.get(key)); } } else if (key.equalsIgnoreCase(searchKey)) {// rule 3 linkURLS.add(mapForKeyValues.get(key)); } else if (key.endsWith("_")) {// rule 5 if (-1 != index) { linkURLS.add(mapForKeyValues.get(key)); } } else if (len > 0) {// rule 4 for (int i = 0; i < len; i++) { if (searchKeyValues[i].equalsIgnoreCase(key)) { linkURLS.add(mapForKeyValues.get(key)); break; } } } } return linkURLS; } public static void main(String[] args) { ArrayList<String> linkURLS = getHtml .search("My Name Is jay_ patil_00_."); for (String value : linkURLS) { System.out.println(value); } } } --------------------------------------------------------------------------
2. Second java file searchByChar.java public class searchByChar { public static int[] prekmp(String pattern) { int[] next = new int[pattern.length()]; int i=0, j=-1; next[0]=-1; while (i<pattern.length()-1) { while (j>=0 && pattern.charAt(i)!=pattern.charAt(j)) j = next[j]; i++; j++; next[i] = j; } return next; } public static int kmp(String text, String pattern) { int[] next = prekmp(pattern); int i=0, j=0; while (i<text.length()) { while (j>=0 && text.charAt(i)!=pattern.charAt(j)) j = next[j]; i++; j++; if (j==pattern.length()) return i-pattern.length(); } return -1; } } --------------------------------------------------------------------------
The content of text file are like:
patil_00_ <A HREF="http://support.jay.com:8080/index.jsp" title="View the Supp" target=_blank class="table">patil_00_</A>
jay_ <A HREF="http://support.sac.com:8080/index.jsp" title="View the jsp" target=_blank class="link">jay_</A>
...........................
and the 3rd file FileReader.java read the text file using DataInputStream and return the dataInputStream