DarkMatter in Cyberspace
  • Home
  • Categories
  • Tags
  • Archives

基于字节比对的KMP字符串搜索算法


package kmpclient; import java.io.UnsupportedEncodingException; import java.util.LinkedList; public class ByteKMP { private static final int R = 255; private static final String DEFAULT_CHARSET = "UTF-8"; private final int[][] dfa; private final int patLen; private int curMatchPos = 0; public ByteKMP(byte[] pat) throws UnsupportedEncodingException { patLen = pat.length; // build DFA from pattern int M = pat.length; dfa = new int[R][M]; dfa[pat[0] & 0xff][0] = 1; for (int X = 0, j = 1; j < M; j++) { for (int c = 0; c < R; c++) dfa[c][j] = dfa[c][X]; // Copy mismatch cases. dfa[pat[j] & 0xff][j] = j + 1; // Set match case. X = dfa[pat[j] & 0xff][X]; // Update restart state. } } public int search(byte[] msg) { int M = patLen; int N = msg.length; int i, j; for (i = 0, j = curMatchPos; i < N && j < M; i++) { j = dfa[msg[i] & 0xff][j]; } if (j == M) { curMatchPos = 0; return i - M; // found } curMatchPos = j; return N; // not found } public static void main(String[] args) throws UnsupportedEncodingException { int a = -27 % 255; testEnglishStr(); testChineseStr(); } private static void testChineseStr() throws UnsupportedEncodingException { LinkedList msgs = new LinkedList(); msgs.offer("今天是个好日子".getBytes(DEFAULT_CHARSET)); msgs.offer("厉害不?子= KM= KMP = 真厉害!".getBytes(DEFAULT_CHARSET)); // msgs.offer("= KMP = 真厉害!".getBytes(DEFAULT_CHARSET)); String pat = "子= KM"; // e5, ad, 90, ... [-27, -83, -112, 61, 32, 75, 77] ByteKMP bkmp = new ByteKMP(pat.getBytes(DEFAULT_CHARSET)); int pos = 9999; byte[] target = msgs.poll(); while (target != null) { pos = bkmp.search(target); if (pos < target.length) { break; } target = msgs.poll(); } if (target == null) { System.out.println("cannot find " + pat + " in msgs."); } else { System.out.println("find pat at " + pos + " in <" + new String(target) + ">"); } } private static void testEnglishStr() throws UnsupportedEncodingException { LinkedList msgs = new LinkedList(); msgs.offer("welcome lonely logoout".getBytes(DEFAULT_CHARSET)); msgs.offer("to flog".getBytes(DEFAULT_CHARSET)); msgs.offer("into a fog".getBytes(DEFAULT_CHARSET)); String pat = "a f"; ByteKMP bkmp = new ByteKMP(pat.getBytes(DEFAULT_CHARSET)); int pos = 9999; byte[] target = msgs.poll(); while (target != null) { pos = bkmp.search(target); if (pos < target.length) { break; } target = msgs.poll(); } if (target == null) { System.out.println("cannot find " + pat + " in msgs."); } else { System.out.println("find pat at " + pos + " in <" + new String(target) + ">"); } } }



Published

Feb 28, 2013

Last Updated

Feb 28, 2013

Category

Tech

Tags

  • Java 106
  • kmp 1

Contact

  • Powered by Pelican. Theme: Elegant by Talha Mansoor