Skip to content

Instantly share code, notes, and snippets.

@notriddle
Last active November 7, 2024 21:06
Show Gist options
  • Select an option

  • Save notriddle/aa8edfe3c1b0a4f5e777d6db179464a2 to your computer and use it in GitHub Desktop.

Select an option

Save notriddle/aa8edfe3c1b0a4f5e777d6db179464a2 to your computer and use it in GitHub Desktop.

Revisions

  1. notriddle revised this gist Nov 7, 2024. 1 changed file with 9 additions and 8 deletions.
    17 changes: 9 additions & 8 deletions edit-distance-tester.js
    Original file line number Diff line number Diff line change
    @@ -25,29 +25,30 @@ class EditDistanceCalculator {
    // Fast path were we already know the correct regex
    const c = esc[i];
    current[i] = dist === 2 ?
    ("((" + c + ".{0,2})|(." + c + ".?)|(.{0,2}" + c + "?))") :
    ("((" + c + ".)|(." + c + ")|.?)");
    ("(?:(:?" + c + ".{0,2})|(?:." + c + ".?)|(?:.{0,2}" + c + "?))") :
    ("(?:(:?" + c + ".)|(?:." + c + ")|.?)");
    continue;
    }
    let result = "(";
    let close = ")";
    for (let j = i; j < l; j += 1) {
    // substitution | deletion
    result += "(.?" + prev[j + 1] + ")|";
    result += "(?:.?" + prev[j + 1] + ")|";
    // insertion
    result += "(." + prev[j] + ")|";
    result += "(?:." + prev[j] + ")|";
    // transposition
    if (j + 1 < l && string[j + 1] !== string[j]) {
    result += "(" + esc[j + 1] + esc[j] + prev[j + 2] + ")|";
    result += "(?:" + esc[j + 1] + esc[j] + prev[j + 2] + ")|";
    }
    // verbatim
    result += "(" + esc[j] + "(";
    result += "(?:" + esc[j] + "(?:";
    close += "))";
    }
    // insertion after verbatim
    if (dist > 1) {
    result += ".{0," + dist + "}"; // insertion
    result += ".{0," + dist + "}";
    } else {
    result += ".?"; // insertion
    result += ".?";
    }
    result += close;
    current[i] = result;
  2. notriddle revised this gist Nov 7, 2024. 1 changed file with 14 additions and 4 deletions.
    18 changes: 14 additions & 4 deletions edit-distance-tester.js
    Original file line number Diff line number Diff line change
    @@ -13,11 +13,22 @@ class EditDistanceCalculator {
    let prev = [];
    let current = [];
    const l = string.length;
    for (let i = 0; i <= l; i += 1) {
    const esc = [];
    for (let i = 0; i < l; i += 1) {
    esc[i] = string[i].replace(REGEX_SPECIALS, x => `\\${x}`);
    prev[i] = string.substring(i).replace(REGEX_SPECIALS, x => `\\${x}`);
    }
    prev[l] = "";
    for (let dist = 1; dist <= limit; dist += 1) {
    for (let i = 0; i <= l; i += 1) {
    if (dist <= 2 && l - i === 1) {
    // Fast path were we already know the correct regex
    const c = esc[i];
    current[i] = dist === 2 ?
    ("((" + c + ".{0,2})|(." + c + ".?)|(.{0,2}" + c + "?))") :
    ("((" + c + ".)|(." + c + ")|.?)");
    continue;
    }
    let result = "(";
    let close = ")";
    for (let j = i; j < l; j += 1) {
    @@ -27,11 +38,10 @@ class EditDistanceCalculator {
    result += "(." + prev[j] + ")|";
    // transposition
    if (j + 1 < l && string[j + 1] !== string[j]) {
    result += "(" + (string[j + 1] + string[j]).replace(REGEX_SPECIALS, x => `\\${x}`) +
    prev[j + 2] + ")|";
    result += "(" + esc[j + 1] + esc[j] + prev[j + 2] + ")|";
    }
    // verbatim
    result += "(" + string[j].replace(REGEX_SPECIALS, x => `\\${x}`) + "(";
    result += "(" + esc[j] + "(";
    close += "))";
    }
    if (dist > 1) {
  3. notriddle revised this gist Nov 7, 2024. 1 changed file with 37 additions and 25 deletions.
    62 changes: 37 additions & 25 deletions edit-distance-tester.js
    Original file line number Diff line number Diff line change
    @@ -1,3 +1,5 @@
    // need to escape specials
    // https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Regular_expressions
    const REGEX_SPECIALS = /[\$\(\)\*\+\.\/\?\[\\\]\^\{\|\}]/;
    class EditDistanceCalculator {
    constructor(string, limit) {
    @@ -8,33 +10,43 @@ class EditDistanceCalculator {
    this.matcherRegExp = new RegExp("^" + this.buildMatcherRegExp(string, limit) + "$");
    }
    buildMatcherRegExp(string, limit) {
    if (limit <= 0) {
    // need to escape specials
    // https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Regular_expressions
    return string.replace(REGEX_SPECIALS, x => `\\${x}`);
    let prev = [];
    let current = [];
    const l = string.length;
    for (let i = 0; i <= l; i += 1) {
    prev[i] = string.substring(i).replace(REGEX_SPECIALS, x => `\\${x}`);
    }
    let result = "(";
    let close = ")";
    while (string !== "") {
    // substitution | deletion
    result += "(.?" + this.buildMatcherRegExp(string.substring(1), limit - 1) + ")|";
    // insertion
    result += "(." + this.buildMatcherRegExp(string, limit - 1) + ")|";
    // transposition
    if (string.length >= 2) {
    result += "(" + (string[1] + string[0]).replace(REGEX_SPECIALS, x => `\\${x}`) +
    this.buildMatcherRegExp(string.substring(2), limit - 1) + ")|";
    for (let dist = 1; dist <= limit; dist += 1) {
    for (let i = 0; i <= l; i += 1) {
    let result = "(";
    let close = ")";
    for (let j = i; j < l; j += 1) {
    // substitution | deletion
    result += "(.?" + prev[j + 1] + ")|";
    // insertion
    result += "(." + prev[j] + ")|";
    // transposition
    if (j + 1 < l && string[j + 1] !== string[j]) {
    result += "(" + (string[j + 1] + string[j]).replace(REGEX_SPECIALS, x => `\\${x}`) +
    prev[j + 2] + ")|";
    }
    // verbatim
    result += "(" + string[j].replace(REGEX_SPECIALS, x => `\\${x}`) + "(";
    close += "))";
    }
    if (dist > 1) {
    result += ".{0," + dist + "}"; // insertion
    } else {
    result += ".?"; // insertion
    }
    result += close;
    current[i] = result;
    }
    // verbatim
    result += "(" + string[0].replace(REGEX_SPECIALS, x => `\\${x}`) + "(";
    close += "))";
    string = string.substring(1);
    const tmp = prev;
    prev = current;
    current = prev;
    }
    for (let i = 0; i < limit; ++i) {
    result += ".?"; // insertion
    }
    result += close;
    return result;
    return prev[0];
    }
    /*
    * This function was translated, mostly line-for-line, from
    @@ -157,4 +169,4 @@ for (let limit = 0; limit < 4; limit += 1) {
    }
    }
    }
    }
    }
  4. notriddle revised this gist Nov 7, 2024. 1 changed file with 2 additions and 4 deletions.
    6 changes: 2 additions & 4 deletions edit-distance-tester.js
    Original file line number Diff line number Diff line change
    @@ -16,10 +16,8 @@ class EditDistanceCalculator {
    let result = "(";
    let close = ")";
    while (string !== "") {
    // substitution
    result += "(." + this.buildMatcherRegExp(string.substring(1), limit - 1) + ")|";
    // deletion
    result += "(" + this.buildMatcherRegExp(string.substring(1), limit - 1) + ")|";
    // substitution | deletion
    result += "(.?" + this.buildMatcherRegExp(string.substring(1), limit - 1) + ")|";
    // insertion
    result += "(." + this.buildMatcherRegExp(string, limit - 1) + ")|";
    // transposition
  5. notriddle created this gist Nov 7, 2024.
    162 changes: 162 additions & 0 deletions edit-distance-tester.js
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,162 @@
    const REGEX_SPECIALS = /[\$\(\)\*\+\.\/\?\[\\\]\^\{\|\}]/;
    class EditDistanceCalculator {
    constructor(string, limit) {
    this.string = string;
    this.current = [];
    this.prev = [];
    this.prevPrev = [];
    this.matcherRegExp = new RegExp("^" + this.buildMatcherRegExp(string, limit) + "$");
    }
    buildMatcherRegExp(string, limit) {
    if (limit <= 0) {
    // need to escape specials
    // https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Regular_expressions
    return string.replace(REGEX_SPECIALS, x => `\\${x}`);
    }
    let result = "(";
    let close = ")";
    while (string !== "") {
    // substitution
    result += "(." + this.buildMatcherRegExp(string.substring(1), limit - 1) + ")|";
    // deletion
    result += "(" + this.buildMatcherRegExp(string.substring(1), limit - 1) + ")|";
    // insertion
    result += "(." + this.buildMatcherRegExp(string, limit - 1) + ")|";
    // transposition
    if (string.length >= 2) {
    result += "(" + (string[1] + string[0]).replace(REGEX_SPECIALS, x => `\\${x}`) +
    this.buildMatcherRegExp(string.substring(2), limit - 1) + ")|";
    }
    // verbatim
    result += "(" + string[0].replace(REGEX_SPECIALS, x => `\\${x}`) + "(";
    close += "))";
    string = string.substring(1);
    }
    for (let i = 0; i < limit; ++i) {
    result += ".?"; // insertion
    }
    result += close;
    return result;
    }
    /*
    * This function was translated, mostly line-for-line, from
    * https://github.com/rust-lang/rust/blob/ff4b772f805ec1e/compiler/rustc_span/src/edit_distance.rs
    *
    * The current implementation is the restricted Damerau-Levenshtein algorithm. It is restricted
    * because it does not permit modifying characters that have already been transposed. The specific
    * algorithm should not matter to the caller of the methods, which is why it is not noted in the
    * documentation.
    */
    calculate(b, limit) {
    let a = this.string;
    // Ensure that `b` is the shorter string, minimizing memory use.
    if (a.length < b.length) {
    const aTmp = a;
    a = b;
    b = aTmp;
    }

    const minDist = a.length - b.length;
    // If we know the limit will be exceeded, we can return early.
    if (minDist > limit) {
    return limit + 1;
    }

    // Strip common prefix.
    // We know that `b` is the shorter string, so we don't need to check
    // `a.length`.
    while (b.length > 0 && b[0] === a[0]) {
    a = a.substring(1);
    b = b.substring(1);
    }
    // Strip common suffix.
    while (b.length > 0 && b[b.length - 1] === a[a.length - 1]) {
    a = a.substring(0, a.length - 1);
    b = b.substring(0, b.length - 1);
    }

    // If either string is empty, the distance is the length of the other.
    // We know that `b` is the shorter string, so we don't need to check `a`.
    if (b.length === 0) {
    return minDist;
    }

    const aLength = a.length;
    const bLength = b.length;

    for (let i = 0; i <= bLength; ++i) {
    this.current[i] = 0;
    this.prev[i] = i;
    this.prevPrev[i] = Number.MAX_VALUE;
    }

    // row by row
    for (let i = 1; i <= aLength; ++i) {
    this.current[0] = i;
    const aIdx = i - 1;

    // column by column
    for (let j = 1; j <= bLength; ++j) {
    const bIdx = j - 1;

    // There is no cost to substitute a character with itself.
    const substitutionCost = a[aIdx] === b[bIdx] ? 0 : 1;

    this.current[j] = Math.min(
    // deletion
    this.prev[j] + 1,
    // insertion
    this.current[j - 1] + 1,
    // substitution
    this.prev[j - 1] + substitutionCost,
    );

    if ((i > 1) && (j > 1) && (a[aIdx] === b[bIdx - 1]) && (a[aIdx - 1] === b[bIdx])) {
    // transposition
    this.current[j] = Math.min(
    this.current[j],
    this.prevPrev[j - 2] + 1,
    );
    }
    }

    // Rotate the buffers, reusing the memory
    const prevPrevTmp = this.prevPrev;
    this.prevPrev = this.prev;
    this.prev = this.current;
    this.current = prevPrevTmp;
    }

    // `prev` because we already rotated the buffers.
    const distance = this.prev[bLength];
    return distance <= limit ? distance : (limit + 1);
    }
    }

    function buildString(seed) {
    let string = "";
    let alphabet = "ABCD";
    let seed_builder = seed;
    while (seed_builder > 0) {
    string += alphabet[seed_builder & 3];
    seed_builder = seed_builder >> 2;
    }
    return string;
    }

    for (let limit = 0; limit < 4; limit += 1) {
    console.log(limit);
    for (let seed = 0; seed < 1024; seed += 1) {
    let string1 = buildString(seed);
    for (let seed2 = 0; seed2 < 1024; seed2 += 1) {
    let string2 = buildString(seed2);
    let calculator = new EditDistanceCalculator(string1, limit);
    let matcherPasses = calculator.matcherRegExp.test(string2);
    let calculatePasses = calculator.calculate(string2, limit) < limit + 1;
    if (matcherPasses !== calculatePasses) {
    console.log(calculator.matcherRegExp);
    throw new Error("failed string1=" + string1 + " string2=" + string2 + " limit=" + limit + " matcherPasses=" + matcherPasses + " calculatePasses=" + calculatePasses);
    }
    }
    }
    }