java - Longest Common Substring without Splitting words -


i trying implement modified version of longest common substring not want split words.

eg:

string s1 = "hi there how you"; string s2 = "hi there how ar"; 

so output should be: o/p: hi there how.

i first calculating longest common substring , filtering words split , below code same:

private static void longestcommonsubstring(string s1, string s2) {         int start = 0;         int max = 0;         (int = 0; < s1.length(); i++) {             (int j = 0; j < s2.length(); j++) {                 int x = 0;                 while (s1.charat(i + x) == s2.charat(j + x)) {                     x++;                     if (((i + x) >= s1.length()) || ((j + x) >= s2.length()))                         break;                 }                 if (x > max) {                     max = x;                     start = i;                 }             }         }         system.out.println("s1 " + s1 + " s2 " + s2 + " " + start + " " + max);         system.out.println("ans " + s1.substring(start, (start+max)));           if(start != 0){             if((s1.charat(start-1) == ' ')){                 if(start+max != s1.length()){                     if((s1.charat(start+max) == ' ')){                         system.out.println(s1.substring(start, (start + max)));                     }                 }             }             else if((s1.charat(start+max) == ' ')){                 int index = s1.indexof(' ', start);                 system.out.println(s1.substring(index+1, (start + max)));             }             else{                 int index = s1.indexof(' ', start);                 int lastindex = s1.lastindexof(' ', (start+max));                 if(index+1 != lastindex+1){                     system.out.println(s1.substring(index+1, lastindex));                 }                 else{                     system.out.println(" ");                 }             }         }         else if(start == 0 && start+max != s1.length() && (s1.charat(start+max) == ' ')){             system.out.println(s1.substring(start, (start + max)));         }         else if(start+max != s1.length()){             int index = s1.lastindexof(' ', (start+max));             system.out.println(s1.substring(start, index));         }         else{             system.out.println(s1.substring(start, (start + max)));         }       } 

but failing cases like:

string s1 = "hi there how you"; string s2 = "i ere ho ar you"; 

where output should have been "you" blank, because longest common substring "ere ho".

thanks help.

based on karthik, , after bna's comment character-based answer flawed:

public static arraylist<string> stringtotokens(string s) {     arraylist<string> al = new arraylist<string>();     stringbuilder word = new stringbuilder();     boolean inword = !s.isempty() && character.isletter(s.charat(0));     (int i=0; i<s.length(); i++) {         char c = s.charat(i);         if (character.isletter(c)) {             word.append(c);         } else if (inword) {             if (word.length() > 0) {                 al.add(word.tostring());                 word.setlength(0);             }             al.add(""+c);         } else {             al.add(""+c);         }     }     if (word.length() > 0) {         al.add(word.tostring());     }     return al; }  public static string longestcommonwithwords(string sa, string sb) {     arraylist<string> = stringtotokens(sa);     arraylist<string> b = stringtotokens(sb);     int m[][] = new int[a.size()][b.size()];     int bestlength = 0;     hashset<integer> bestset = new hashset<integer>();     (int i=0; i<a.size(); i++) {         (int j=0; j<b.size(); j++) {             if (a.get(i).equals(b.get(j))) {                 m[i][j] = (i==0 || j==0) ? 1 : m[i-1][j-1]+1;                 if (m[i][j] > bestlength) {                     bestlength = m[i][j];                     bestset.clear();                     bestset.add(i);                 } else if (m[i][j] == bestlength) {                     bestset.add(i);                 }             } else {                 m[i][j] = 0;             }         }     }     // return first result (or empty string if none)     stringbuilder result = new stringbuilder();     if (bestlength > 0) {         int end = bestset.iterator().next();         int start = end - bestlength;         (int i=start; i<end; i++) {             result.append(a.get(i+1));         }     }     return result.tostring(); } 

tokenization straightforward (a stringtokenizer have worked too, version handles strange characters better). lcs algorithm comes straight pseudocode in https://en.wikipedia.org/wiki/longest_common_substring_problem#pseudocode


Comments

Popular posts from this blog

How to provide Authorization & Authentication using Asp.net, C#? -

toolbar - How to add link to user registration inside toobar in admin joomla 3 custom component -

How to use Authorization & Authentication in Asp.net, C#? -