Skip to content

Instantly share code, notes, and snippets.

@jappy
Created March 11, 2012 19:19
Show Gist options
  • Select an option

  • Save jappy/2017732 to your computer and use it in GitHub Desktop.

Select an option

Save jappy/2017732 to your computer and use it in GitHub Desktop.

Revisions

  1. jappy revised this gist Mar 11, 2012. 1 changed file with 0 additions and 11 deletions.
    11 changes: 0 additions & 11 deletions MinimumEditDistance.java
    Original file line number Diff line number Diff line change
    @@ -153,17 +153,6 @@ public static void printAlignment(List<int[]> backtrace, CharSequence x, CharSeq
    }
    System.out.println("y:" + builder.toString());
    }

    public static String printBacktrace(List<int[]> backtrace) {
    StringBuilder builder = new StringBuilder();
    builder.append("[");
    for (int[] pair: backtrace) {
    builder.append('[').append(pair[0]).append(',').append(pair[1]).append("],");
    }
    builder.deleteCharAt(builder.length() - 1);
    builder.append("]");
    return builder.toString();
    }

    public static void test(CharSequence x,
    CharSequence y) {
  2. jappy revised this gist Mar 11, 2012. 1 changed file with 4 additions and 0 deletions.
    4 changes: 4 additions & 0 deletions MinimumEditDistance.java
    Original file line number Diff line number Diff line change
    @@ -1,3 +1,7 @@
    import java.util.ArrayList;
    import java.util.List;
    import java.util.Collections;

    public class MinimumEditDistance {

    public interface CostFunction {
  3. jappy revised this gist Mar 11, 2012. 1 changed file with 4 additions and 4 deletions.
    8 changes: 4 additions & 4 deletions MinimumEditDistance.java
    Original file line number Diff line number Diff line change
    @@ -59,10 +59,10 @@ public static int[][] distanceMatrix(CharSequence x,
    {
    int[][] d = new int[x.length()+1][y.length()+1];
    // initialize
    for (int i=0; i < d.length; i++)
    if (i > 0) d[i][0] = d[i-1][0] + deletion.cost(d, x, y, i, 0);
    for (int j=0; j < d[0].length; j++)
    if (j > 0) d[0][j] = d[0][j-1] + insertion.cost(d, x, y, 0, j);
    for (int i=1; i < d.length; i++)
    d[i][0] = d[i-1][0] + deletion.cost(d, x, y, i, 0);
    for (int j=1; j < d[0].length; j++)
    d[0][j] = d[0][j-1] + insertion.cost(d, x, y, 0, j);
    for (int i=1; i < d.length; i++) {
    for (int j=1; j < d[i].length; j++) {
    if (x.charAt(i - 1) == y.charAt(j - 1)) {
  4. jappy revised this gist Mar 11, 2012. 1 changed file with 4 additions and 2 deletions.
    6 changes: 4 additions & 2 deletions MinimumEditDistance.java
    Original file line number Diff line number Diff line change
    @@ -59,8 +59,10 @@ public static int[][] distanceMatrix(CharSequence x,
    {
    int[][] d = new int[x.length()+1][y.length()+1];
    // initialize
    for (int i=0; i < d.length; i++) d[i][0] = i;
    for (int j=0; j < d[0].length; j++) d[0][j] = j;
    for (int i=0; i < d.length; i++)
    if (i > 0) d[i][0] = d[i-1][0] + deletion.cost(d, x, y, i, 0);
    for (int j=0; j < d[0].length; j++)
    if (j > 0) d[0][j] = d[0][j-1] + insertion.cost(d, x, y, 0, j);
    for (int i=1; i < d.length; i++) {
    for (int j=1; j < d[i].length; j++) {
    if (x.charAt(i - 1) == y.charAt(j - 1)) {
  5. jappy created this gist Mar 11, 2012.
    189 changes: 189 additions & 0 deletions MinimumEditDistance.java
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,189 @@
    public class MinimumEditDistance {

    public interface CostFunction {
    public int cost(int[][] distanceMatrix, CharSequence x, CharSequence y, int i, int j);
    }

    public static final CostFunction ONE = new CostFunction() {
    public int cost(int[][] distanceMatrix, CharSequence x, CharSequence y, int i, int j) {
    return 1;
    }
    };

    public static final CostFunction TWO = new CostFunction() {
    public int cost(int[][] distanceMatrix, CharSequence x, CharSequence y, int i, int j) {
    return 2;
    }
    };


    public static int distance(CharSequence x,
    CharSequence y)
    {
    return distance(x, y, false);
    }


    public static int distance(CharSequence x,
    CharSequence y,
    boolean jurafskyLevenshtein)
    {
    int result = 0;
    int[][] d = null;
    if (jurafskyLevenshtein) {
    d = distanceMatrix(x, y, ONE, ONE, TWO);
    } else {
    d = distanceMatrix(x, y, ONE, ONE, ONE);
    }
    result = d[x.length()][y.length()];
    return result;
    }

    public static int distance(CharSequence x,
    CharSequence y,
    CostFunction deletion,
    CostFunction insertion,
    CostFunction substitution)
    {
    int result = 0;
    int[][] d = distanceMatrix(x, y, deletion, insertion, substitution);
    result = d[x.length()][y.length()];
    return result;
    }

    public static int[][] distanceMatrix(CharSequence x,
    CharSequence y,
    CostFunction deletion,
    CostFunction insertion,
    CostFunction substitution)
    {
    int[][] d = new int[x.length()+1][y.length()+1];
    // initialize
    for (int i=0; i < d.length; i++) d[i][0] = i;
    for (int j=0; j < d[0].length; j++) d[0][j] = j;
    for (int i=1; i < d.length; i++) {
    for (int j=1; j < d[i].length; j++) {
    if (x.charAt(i - 1) == y.charAt(j - 1)) {
    d[i][j] = d[i-1][j-1];
    } else {
    d[i][j] = min(d[i-1][j] + deletion.cost(d, x, y, i, j),
    d[i][j-1] + insertion.cost(d, x, y, i, j),
    d[i-1][j-1] + substitution.cost(d, x, y, i, j));
    }
    }
    }
    return d;
    }

    private static int min(int a, int b, int c) {
    return Math.min(Math.min(a, b), c);
    }

    public static int getMinEditDistance(int[][] distanceMatrix) {
    return distanceMatrix[distanceMatrix.length - 1][distanceMatrix[0].length - 1];
    }

    public static List<int[]> computeBacktrace(int[][] d) {
    List<int[]> backtrace = new ArrayList<int[]>();
    int i=d.length-1;
    int j=d[0].length-1;

    while (true) {
    backtrace.add(new int[] {i, j});
    if (i==1 && j==1) break;
    else if (i==1 && j > 1) j--;
    else if (j==1 && i > 1) i--;
    else {
    if (d[i-1][j] < d[i-1][j-1]) {
    i--;
    } else if (d[i][j-1] < d[i-1][j-1]) {
    j--;
    } else {
    i--;
    j--;
    }
    }
    }
    Collections.reverse(backtrace);

    return backtrace;
    }

    public static void printDistanceMatrix(int[][] d, CharSequence x, CharSequence y) {
    StringBuilder builder = new StringBuilder();
    builder.append("\t\t");
    for (int i=0; i < y.length(); i++) {
    builder.append(y.charAt(i)).append('\t');
    }
    builder.append('\n');
    for (int i=0; i < d.length; i++) {
    if (i == 0) builder.append("\t");
    else builder.append(x.charAt(i-1)).append("\t");
    for (int j=0; j < d[i].length; j++) {
    builder.append(d[i][j]).append('\t');
    }
    builder.append('\n');
    }
    System.out.println(builder);
    }

    public static void printAlignment(List<int[]> backtrace, CharSequence x, CharSequence y) {
    StringBuilder builder = new StringBuilder();
    for (int[] pair: backtrace) {
    builder.append(x.charAt(pair[0]-1));
    }
    System.out.println("x:" + builder.toString());

    builder = new StringBuilder();
    for (int[] pair: backtrace) {
    if (x.charAt(pair[0]-1) == y.charAt(pair[1]-1)) builder.append('|');
    else builder.append(' ');
    }
    System.out.println(" " + builder.toString());

    builder = new StringBuilder();
    for (int[] pair: backtrace) {
    builder.append(y.charAt(pair[1]-1));
    }
    System.out.println("y:" + builder.toString());
    }

    public static String printBacktrace(List<int[]> backtrace) {
    StringBuilder builder = new StringBuilder();
    builder.append("[");
    for (int[] pair: backtrace) {
    builder.append('[').append(pair[0]).append(',').append(pair[1]).append("],");
    }
    builder.deleteCharAt(builder.length() - 1);
    builder.append("]");
    return builder.toString();
    }

    public static void test(CharSequence x,
    CharSequence y) {
    test(x, y, ONE, ONE, TWO);
    }

    public static void test(CharSequence x,
    CharSequence y,
    CostFunction deletion,
    CostFunction insertion,
    CostFunction substitution) {
    System.out.println("*********** " + x + " vs. " + y + " ************\n");
    int[][] distanceMatrix = distanceMatrix(x, y, deletion, insertion, substitution);
    int minEditDistance = getMinEditDistance(distanceMatrix);
    System.out.println("min edit distance: " + minEditDistance + "\n");
    printDistanceMatrix(distanceMatrix, x, y);
    List<int[]> backtrace = computeBacktrace(distanceMatrix);
    printAlignment(backtrace, x, y);
    System.out.println('\n');
    }

    public static void main(String[] args) {
    test("hello", "yellow");
    test("sitting", "kitten");
    test("Sunday", "Saturday");
    test("comb", "love");
    test("intention", "execution");
    }
    }