Last active
July 15, 2017 10:44
-
-
Save fmoessbauer/23c77ceb3d817816e3d9e86561644d55 to your computer and use it in GitHub Desktop.
Implementation of the Wagner-Fischer algorithm to calculate the edit distance between two strings. See also https://en.wikipedia.org/wiki/Wagner%E2%80%93Fischer_algorithm
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| /* | |
| * Notice: Requires C++11 support | |
| */ | |
| #include <vector> | |
| #include <string> | |
| #include <algorithm> | |
| /** | |
| * Implementation of the Wagner-Fischer algorithm | |
| * closely following https://en.wikipedia.org/wiki/Wagner%E2%80%93Fischer_algorithm | |
| */ | |
| int EditDistance( | |
| const std::string & s, | |
| const std::string & t) | |
| { | |
| auto m = s.length(); | |
| auto n = t.length(); | |
| std::vector<int> d((m+1)*(n+1)); | |
| for(auto i=0;i<=m;++i){ | |
| d[i*(n+1)] = i; | |
| } | |
| for(auto j=0;j<=n;++j){ | |
| d[j] = j; | |
| } | |
| for(auto j=1; j<=n; ++j){ | |
| for(auto i=1; i<=m; ++i){ | |
| if(s[i-1] == t[j-1]) | |
| d[i*(n+1) + j] = d[(i-1)*(n+1)+(j-1)]; // no operation required | |
| else { | |
| d[i*(n+1)+j] = std::min({ | |
| d[(i-1)*(n+1)+j] + 1, // a deletion | |
| d[i*(n+1)+j-1] + 1, // an insertion | |
| d[(i-1)*(n+1)+j-1] + 1 // a substitution | |
| }); | |
| } | |
| } | |
| } | |
| return *(d.end()-1); | |
| } |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import numpy as np | |
| # Implementation of the Wagner-Fischer algorithm | |
| # closely following https://en.wikipedia.org/wiki/Wagner%E2%80%93Fischer_algorithm | |
| def EditDistance(s, t): | |
| # For all i and j, d[i,j] will hold the Levenshtein distance between | |
| # the first i characters of s and the first j characters of t. | |
| # Note that d has (m+1) x (n+1) values. | |
| m = len(s) | |
| n = len(t) | |
| d = np.zeros(shape=(m+1, n+1), dtype=int) | |
| # the distance of any first string to an empty second string | |
| # (transforming the string of the first i characters of s into | |
| # the empty string requires i deletions) | |
| d[:, 0] = range(0,m+1) | |
| # the distance of any second string to an empty first string | |
| d[0, :] = range(0,n+1) | |
| for j in range(1, n+1): | |
| for i in range(1, m+1): | |
| if s[i-1] == t[j-1]: | |
| d[i, j] = d[i-1, j-1] # no operation required | |
| else: | |
| d[i, j] = min( | |
| d[i-1, j] + 1, # a deletion | |
| d[i, j-1] + 1, # an insertion | |
| d[i-1, j-1] + 1 # a substitution | |
| ) | |
| return d[m,n] |
Author
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Example usage:
Python
C++