mirror of
https://github.com/plexusorg/Plex-FAWE.git
synced 2025-06-16 22:03:53 +00:00
Improved item/block name matching.
This commit is contained in:
@ -163,4 +163,111 @@ public class StringUtil {
|
||||
}
|
||||
return buffer.toString();
|
||||
}
|
||||
|
||||
/**
|
||||
* <p>Find the Levenshtein distance between two Strings.</p>
|
||||
*
|
||||
* <p>This is the number of changes needed to change one String into
|
||||
* another, where each change is a single character modification (deletion,
|
||||
* insertion or substitution).</p>
|
||||
*
|
||||
* <p>The previous implementation of the Levenshtein distance algorithm
|
||||
* was from <a href="http://www.merriampark.com/ld.htm">http://www.merriampark.com/ld.htm</a></p>
|
||||
*
|
||||
* <p>Chas Emerick has written an implementation in Java, which avoids an OutOfMemoryError
|
||||
* which can occur when my Java implementation is used with very large strings.<br>
|
||||
* This implementation of the Levenshtein distance algorithm
|
||||
* is from <a href="http://www.merriampark.com/ldjava.htm">http://www.merriampark.com/ldjava.htm</a></p>
|
||||
*
|
||||
* <pre>
|
||||
* StringUtil.getLevenshteinDistance(null, *) = IllegalArgumentException
|
||||
* StringUtil.getLevenshteinDistance(*, null) = IllegalArgumentException
|
||||
* StringUtil.getLevenshteinDistance("","") = 0
|
||||
* StringUtil.getLevenshteinDistance("","a") = 1
|
||||
* StringUtil.getLevenshteinDistance("aaapppp", "") = 7
|
||||
* StringUtil.getLevenshteinDistance("frog", "fog") = 1
|
||||
* StringUtil.getLevenshteinDistance("fly", "ant") = 3
|
||||
* StringUtil.getLevenshteinDistance("elephant", "hippo") = 7
|
||||
* StringUtil.getLevenshteinDistance("hippo", "elephant") = 7
|
||||
* StringUtil.getLevenshteinDistance("hippo", "zzzzzzzz") = 8
|
||||
* StringUtil.getLevenshteinDistance("hello", "hallo") = 1
|
||||
* </pre>
|
||||
*
|
||||
* @param s the first String, must not be null
|
||||
* @param t the second String, must not be null
|
||||
* @return result distance
|
||||
* @throws IllegalArgumentException if either String input <code>null</code>
|
||||
*/
|
||||
public static int getLevenshteinDistance(String s, String t) {
|
||||
if (s == null || t == null) {
|
||||
throw new IllegalArgumentException("Strings must not be null");
|
||||
}
|
||||
|
||||
/*
|
||||
* The difference between this impl. and the previous is that, rather
|
||||
* than creating and retaining a matrix of size s.length()+1 by
|
||||
* t.length()+1, we maintain two single-dimensional arrays of length
|
||||
* s.length()+1. The first, d, is the 'current working' distance array
|
||||
* that maintains the newest distance cost counts as we iterate through
|
||||
* the characters of String s. Each time we increment the index of
|
||||
* String t we are comparing, d is copied to p, the second int[]. Doing
|
||||
* so allows us to retain the previous cost counts as required by the
|
||||
* algorithm (taking the minimum of the cost count to the left, up one,
|
||||
* and diagonally up and to the left of the current cost count being
|
||||
* calculated). (Note that the arrays aren't really copied anymore, just
|
||||
* switched...this is clearly much better than cloning an array or doing
|
||||
* a System.arraycopy() each time through the outer loop.)
|
||||
*
|
||||
* Effectively, the difference between the two implementations is this
|
||||
* one does not cause an out of memory condition when calculating the LD
|
||||
* over two very large strings.
|
||||
*/
|
||||
|
||||
int n = s.length(); // length of s
|
||||
int m = t.length(); // length of t
|
||||
|
||||
if (n == 0) {
|
||||
return m;
|
||||
} else if (m == 0) {
|
||||
return n;
|
||||
}
|
||||
|
||||
int p[] = new int[n + 1]; // 'previous' cost array, horizontally
|
||||
int d[] = new int[n + 1]; // cost array, horizontally
|
||||
int _d[]; // placeholder to assist in swapping p and d
|
||||
|
||||
// indexes into strings s and t
|
||||
int i; // iterates through s
|
||||
int j; // iterates through t
|
||||
|
||||
char t_j; // jth character of t
|
||||
|
||||
int cost; // cost
|
||||
|
||||
for (i = 0; i <= n; i++) {
|
||||
p[i] = i;
|
||||
}
|
||||
|
||||
for (j = 1; j <= m; j++) {
|
||||
t_j = t.charAt(j - 1);
|
||||
d[0] = j;
|
||||
|
||||
for (i = 1; i <= n; i++) {
|
||||
cost = s.charAt(i - 1) == t_j ? 0 : 1;
|
||||
// minimum of cell to the left+1, to the top+1, diagonally left
|
||||
// and up +cost
|
||||
d[i] = Math.min(Math.min(d[i - 1] + 1, p[i] + 1), p[i - 1]
|
||||
+ cost);
|
||||
}
|
||||
|
||||
// copy current distance counts to 'previous row' distance counts
|
||||
_d = p;
|
||||
p = d;
|
||||
d = _d;
|
||||
}
|
||||
|
||||
// our last action in the above loop was to switch d and p, so p now
|
||||
// actually has the most recent cost counts
|
||||
return p[n];
|
||||
}
|
||||
}
|
||||
|
Reference in New Issue
Block a user