Skip to content

Instantly share code, notes, and snippets.

@notriddle
Last active November 7, 2024 21:06
Show Gist options
  • Select an option

  • Save notriddle/aa8edfe3c1b0a4f5e777d6db179464a2 to your computer and use it in GitHub Desktop.

Select an option

Save notriddle/aa8edfe3c1b0a4f5e777d6db179464a2 to your computer and use it in GitHub Desktop.
const REGEX_SPECIALS = /[\$\(\)\*\+\.\/\?\[\\\]\^\{\|\}]/;
class EditDistanceCalculator {
constructor(string, limit) {
this.string = string;
this.current = [];
this.prev = [];
this.prevPrev = [];
this.matcherRegExp = new RegExp("^" + this.buildMatcherRegExp(string, limit) + "$");
}
buildMatcherRegExp(string, limit) {
if (limit <= 0) {
// need to escape specials
// https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Regular_expressions
return string.replace(REGEX_SPECIALS, x => `\\${x}`);
}
let result = "(";
let close = ")";
while (string !== "") {
// substitution | deletion
result += "(.?" + this.buildMatcherRegExp(string.substring(1), limit - 1) + ")|";
// insertion
result += "(." + this.buildMatcherRegExp(string, limit - 1) + ")|";
// transposition
if (string.length >= 2) {
result += "(" + (string[1] + string[0]).replace(REGEX_SPECIALS, x => `\\${x}`) +
this.buildMatcherRegExp(string.substring(2), limit - 1) + ")|";
}
// verbatim
result += "(" + string[0].replace(REGEX_SPECIALS, x => `\\${x}`) + "(";
close += "))";
string = string.substring(1);
}
for (let i = 0; i < limit; ++i) {
result += ".?"; // insertion
}
result += close;
return result;
}
/*
* This function was translated, mostly line-for-line, from
* https://github.com/rust-lang/rust/blob/ff4b772f805ec1e/compiler/rustc_span/src/edit_distance.rs
*
* The current implementation is the restricted Damerau-Levenshtein algorithm. It is restricted
* because it does not permit modifying characters that have already been transposed. The specific
* algorithm should not matter to the caller of the methods, which is why it is not noted in the
* documentation.
*/
calculate(b, limit) {
let a = this.string;
// Ensure that `b` is the shorter string, minimizing memory use.
if (a.length < b.length) {
const aTmp = a;
a = b;
b = aTmp;
}
const minDist = a.length - b.length;
// If we know the limit will be exceeded, we can return early.
if (minDist > limit) {
return limit + 1;
}
// Strip common prefix.
// We know that `b` is the shorter string, so we don't need to check
// `a.length`.
while (b.length > 0 && b[0] === a[0]) {
a = a.substring(1);
b = b.substring(1);
}
// Strip common suffix.
while (b.length > 0 && b[b.length - 1] === a[a.length - 1]) {
a = a.substring(0, a.length - 1);
b = b.substring(0, b.length - 1);
}
// If either string is empty, the distance is the length of the other.
// We know that `b` is the shorter string, so we don't need to check `a`.
if (b.length === 0) {
return minDist;
}
const aLength = a.length;
const bLength = b.length;
for (let i = 0; i <= bLength; ++i) {
this.current[i] = 0;
this.prev[i] = i;
this.prevPrev[i] = Number.MAX_VALUE;
}
// row by row
for (let i = 1; i <= aLength; ++i) {
this.current[0] = i;
const aIdx = i - 1;
// column by column
for (let j = 1; j <= bLength; ++j) {
const bIdx = j - 1;
// There is no cost to substitute a character with itself.
const substitutionCost = a[aIdx] === b[bIdx] ? 0 : 1;
this.current[j] = Math.min(
// deletion
this.prev[j] + 1,
// insertion
this.current[j - 1] + 1,
// substitution
this.prev[j - 1] + substitutionCost,
);
if ((i > 1) && (j > 1) && (a[aIdx] === b[bIdx - 1]) && (a[aIdx - 1] === b[bIdx])) {
// transposition
this.current[j] = Math.min(
this.current[j],
this.prevPrev[j - 2] + 1,
);
}
}
// Rotate the buffers, reusing the memory
const prevPrevTmp = this.prevPrev;
this.prevPrev = this.prev;
this.prev = this.current;
this.current = prevPrevTmp;
}
// `prev` because we already rotated the buffers.
const distance = this.prev[bLength];
return distance <= limit ? distance : (limit + 1);
}
}
function buildString(seed) {
let string = "";
let alphabet = "ABCD";
let seed_builder = seed;
while (seed_builder > 0) {
string += alphabet[seed_builder & 3];
seed_builder = seed_builder >> 2;
}
return string;
}
for (let limit = 0; limit < 4; limit += 1) {
console.log(limit);
for (let seed = 0; seed < 1024; seed += 1) {
let string1 = buildString(seed);
for (let seed2 = 0; seed2 < 1024; seed2 += 1) {
let string2 = buildString(seed2);
let calculator = new EditDistanceCalculator(string1, limit);
let matcherPasses = calculator.matcherRegExp.test(string2);
let calculatePasses = calculator.calculate(string2, limit) < limit + 1;
if (matcherPasses !== calculatePasses) {
console.log(calculator.matcherRegExp);
throw new Error("failed string1=" + string1 + " string2=" + string2 + " limit=" + limit + " matcherPasses=" + matcherPasses + " calculatePasses=" + calculatePasses);
}
}
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment