Introduction to Levenshtein distance
Last Updated :
31 Jan, 2024
Levenshtein distance is a measure of the similarity between two strings, which takes into account the number of insertion, deletion and substitution operations needed to transform one string into the other.
Operations in Levenshtein distance are:
- Insertion: Adding a character to string A.
- Deletion: Removing a character from string A.
- Replacement: Replacing a character in string A with another character.
Let’s see an example that there is String A: “kitten” which need to be converted in String B: “sitting” so we need to determine the minimum operation required
- kitten → sitten (substitution of “s” for “k”)
- sitten → sittin (substitution of “i” for ????”)
- sittin → sitting (insertion of “g” at the end).
In this case it took three operation do this, so the levenshtein distance will be 3.
- Upper and lower bounds: If and only if the two strings are identical, the Levenshtein distance is always non-negative and zero. Because it requires completely changing one string into the other through deletions or insertions, the most feasible Levenshtein distance between two strings of length m and n is max(m, n).
Applications of Levenshtein distance:
The Levenshtein distance has various applications in various fields such as:
- Autocorrect Algorithms: Text editors and messaging applications use the Levenshtein distance in their autocorrect features such as gboard, swift keyboard, etc.
- Data cleaning: It is widely used in the process of data cleaning and normalization task to reduce redundancy and identify similar records in the data mining process.
- Data clustering and classification: To identify similar records and cluster them is clustering while identifying similar records and providing them with class labels is classification
Relationship with other edit distance metrics:
Let’s see how Levenshtein distance is different from other distance metrics
- Damerau-Levenshtein distance: It is similar to the Levenshtein distance, but it just also allows transpositions as an additional operation making it 4 operations.
- Hamming distance: It can only be applied to strings of equal length, it is used measures the number of positions at which the corresponding characters are different.
Now let’s see its implementation using different approaches in different approaches:
1) Levenshtein distance using a recursive approach
To calculate the Levenshtein distance, In the recursive technique, we will use a simple recursive function. It checks each character in the two strings and performs recursive insertions, removals, and replacements.
Below is the implementation for the above idea:
C++
#include <bits/stdc++.h>
using namespace std;
int levenshteinRecursive( const string& str1,
const string& str2, int m, int n)
{
if (m == 0) {
return n;
}
if (n == 0) {
return m;
}
if (str1[m - 1] == str2[n - 1]) {
return levenshteinRecursive(str1, str2, m - 1,
n - 1);
}
return 1
+ min(
levenshteinRecursive(str1, str2, m, n - 1),
min(
levenshteinRecursive(str1, str2, m - 1,
n),
levenshteinRecursive(str1, str2, m - 1,
n - 1)));
}
int main()
{
string str1 = "kitten" ;
string str2 = "sitting" ;
int distance = levenshteinRecursive(
str1, str2, str1.length(), str2.length());
cout << "Levenshtein Distance: " << distance << endl;
return 0;
}
|
Java
import java.io.*;
public class Solution {
public static int levenshteinRecursive(String str1,
String str2, int m, int n) {
if (m == 0 ) {
return n;
}
if (n == 0 ) {
return m;
}
if (str1.charAt(m - 1 ) == str2.charAt(n - 1 )) {
return levenshteinRecursive(str1, str2, m - 1 , n - 1 );
}
return 1 + Math.min(
levenshteinRecursive(str1, str2, m, n - 1 ),
Math.min(
levenshteinRecursive(str1, str2, m - 1 , n),
levenshteinRecursive(str1, str2, m - 1 , n - 1 )
)
);
}
public static void main(String[] args) {
String str1 = "kitten" ;
String str2 = "sitting" ;
int distance = levenshteinRecursive(str1, str2, str1.length(), str2.length());
System.out.println( "Levenshtein Distance: " + distance);
}
}
|
Python3
def levenshteinRecursive(str1, str2, m, n):
if m = = 0 :
return n
if n = = 0 :
return m
if str1[m - 1 ] = = str2[n - 1 ]:
return levenshteinRecursive(str1, str2, m - 1 , n - 1 )
return 1 + min (
levenshteinRecursive(str1, str2, m, n - 1 ),
min (
levenshteinRecursive(str1, str2, m - 1 , n),
levenshteinRecursive(str1, str2, m - 1 , n - 1 ))
)
str1 = "kitten"
str2 = "sitting"
distance = levenshteinRecursive(str1, str2, len (str1), len (str2))
print ( "Levenshtein Distance:" , distance)
|
C#
using System;
class Program
{
static int LevenshteinRecursive( string str1, string str2, int m, int n)
{
if (m == 0)
{
return n;
}
if (n == 0)
{
return m;
}
if (str1[m - 1] == str2[n - 1])
{
return LevenshteinRecursive(str1, str2, m - 1, n - 1);
}
return 1 + Math.Min(
Math.Min(
LevenshteinRecursive(str1, str2, m, n - 1),
LevenshteinRecursive(str1, str2, m - 1, n)
),
LevenshteinRecursive(str1, str2, m - 1, n - 1)
);
}
static void Main()
{
string str1 = "kitten" ;
string str2 = "sitting" ;
int distance = LevenshteinRecursive(str1, str2, str1.Length, str2.Length);
Console.WriteLine( "Levenshtein Distance: " + distance);
}
}
|
Javascript
function levenshteinRecursive(str1, str2, m, n) {
if (m === 0) {
return n;
}
if (n === 0) {
return m;
}
if (str1[m - 1] === str2[n - 1]) {
return levenshteinRecursive(str1, str2, m - 1, n - 1);
}
return 1 + Math.min(
levenshteinRecursive(str1, str2, m, n - 1),
levenshteinRecursive(str1, str2, m - 1, n),
levenshteinRecursive(str1, str2, m - 1, n - 1)
);
}
const str1 = "kitten" ;
const str2 = "sitting" ;
const distance = levenshteinRecursive(str1, str2, str1.length, str2.length);
console.log( "Levenshtein Distance: " + distance);
|
Output
Levenshtein Distance: 3
Time complexity: O(3^(m+n))
Auxiliary complexity: O(m+n)
2) Levenshtein distance using Iterative with the full matrix approach
The iterative technique with a full matrix uses a 2D matrix to hold the intermediate results of the Levenshtein distance calculation. It begins with empty strings and iteratively fills the matrix row by row. It computes the minimum cost of insertions, deletions, and replacements based on the characters of both strings.
Below is the implementation for the above idea:
C++
#include <bits/stdc++.h>
using namespace std;
int levenshteinFullMatrix( const string& str1,
const string& str2)
{
int m = str1.length();
int n = str2.length();
vector<vector< int > > dp(m + 1, vector< int >(n + 1, 0));
for ( int i = 0; i <= m; i++) {
dp[i][0] = i;
}
for ( int j = 0; j <= n; j++) {
dp[0][j] = j;
}
for ( int i = 1; i <= m; i++) {
for ( int j = 1; j <= n; j++) {
if (str1[i - 1] == str2[j - 1]) {
dp[i][j] = dp[i - 1][j - 1];
}
else {
dp[i][j] = 1
+ min(
dp[i][j - 1],
min(
dp[i - 1][j],
dp[i - 1][j - 1]));
}
}
}
return dp[m][n];
}
int main()
{
string str1 = "kitten" ;
string str2 = "sitting" ;
int distance = levenshteinFullMatrix(str1, str2);
cout << "Levenshtein Distance: " << distance << endl;
return 0;
}
|
Java
import java.util.Arrays;
public class LevenshteinDistance {
public static int levenshteinFullMatrix(String str1, String str2) {
int m = str1.length();
int n = str2.length();
int [][] dp = new int [m + 1 ][n + 1 ];
for ( int i = 0 ; i <= m; i++) {
dp[i][ 0 ] = i;
}
for ( int j = 0 ; j <= n; j++) {
dp[ 0 ][j] = j;
}
for ( int i = 1 ; i <= m; i++) {
for ( int j = 1 ; j <= n; j++) {
if (str1.charAt(i - 1 ) == str2.charAt(j - 1 )) {
dp[i][j] = dp[i - 1 ][j - 1 ];
} else {
dp[i][j] = 1 + Math.min(
dp[i][j - 1 ],
Math.min(
dp[i - 1 ][j],
dp[i - 1 ][j - 1 ]));
}
}
}
return dp[m][n];
}
public static void main(String[] args) {
String str1 = "kitten" ;
String str2 = "sitting" ;
int distance = levenshteinFullMatrix(str1, str2);
System.out.println( "Levenshtein Distance: " + distance);
}
}
|
Python3
def levenshteinFullMatrix(str1, str2):
m = len (str1)
n = len (str2)
dp = [[ 0 for _ in range (n + 1 )] for _ in range (m + 1 )]
for i in range (m + 1 ):
dp[i][ 0 ] = i
for j in range (n + 1 ):
dp[ 0 ][j] = j
for i in range ( 1 , m + 1 ):
for j in range ( 1 , n + 1 ):
if str1[i - 1 ] = = str2[j - 1 ]:
dp[i][j] = dp[i - 1 ][j - 1 ]
else :
dp[i][j] = 1 + min (dp[i][j - 1 ], dp[i - 1 ][j], dp[i - 1 ][j - 1 ])
return dp[m][n]
str1 = "kitten"
str2 = "sitting"
distance = levenshteinFullMatrix(str1, str2)
print (f "Levenshtein Distance: {distance}" )
|
C#
using System;
class LevenshteinDistance
{
static int LevenshteinFullMatrix( string str1, string str2)
{
int m = str1.Length;
int n = str2.Length;
int [,] dp = new int [m + 1, n + 1];
for ( int i = 0; i <= m; i++)
{
dp[i, 0] = i;
}
for ( int j = 0; j <= n; j++)
{
dp[0, j] = j;
}
for ( int i = 1; i <= m; i++)
{
for ( int j = 1; j <= n; j++)
{
if (str1[i - 1] == str2[j - 1])
{
dp[i, j] = dp[i - 1, j - 1];
}
else
{
dp[i, j] = 1 + Math.Min(
dp[i, j - 1],
Math.Min(
dp[i - 1, j],
dp[i - 1, j - 1]
)
);
}
}
}
return dp[m, n];
}
static void Main()
{
string str1 = "kitten" ;
string str2 = "sitting" ;
int distance = LevenshteinFullMatrix(str1, str2);
Console.WriteLine( "Levenshtein Distance: " + distance);
}
}
|
Javascript
function levenshteinFullMatrix(str1, str2) {
const m = str1.length;
const n = str2.length;
const dp = new Array(m + 1).fill( null ).map(() => new Array(n + 1).fill(0));
for (let i = 0; i <= m; i++) {
dp[i][0] = i;
}
for (let j = 0; j <= n; j++) {
dp[0][j] = j;
}
for (let i = 1; i <= m; i++) {
for (let j = 1; j <= n; j++) {
if (str1[i - 1] === str2[j - 1]) {
dp[i][j] = dp[i - 1][j - 1];
} else {
dp[i][j] = 1 + Math.min(
dp[i][j - 1],
Math.min(
dp[i - 1][j],
dp[i - 1][j - 1]
)
);
}
}
}
return dp[m][n];
}
const str1 = "kitten" ;
const str2 = "sitting" ;
const distance = levenshteinFullMatrix(str1, str2);
console.log( "Levenshtein Distance:" , distance);
|
Output
Levenshtein Distance: 3
Time complexity: O(m*n)
Auxiliary complexity: O(m*n)
3) Levenshtein distance using Iterative with two matrix rows approach
By simply storing two rows of the matrix at a time, the iterative technique with two matrix rows reduces space complexity. It iterates through the strings row by row, storing the current and past calculations in two rows.
Below is the implementation for the above approach:
C++
#include <bits/stdc++.h>
using namespace std;
int levenshteinTwoMatrixRows( const string& str1,
const string& str2)
{
int m = str1.length();
int n = str2.length();
vector< int > prevRow(n + 1, 0);
vector< int > currRow(n + 1, 0);
for ( int j = 0; j <= n; j++) {
prevRow[j] = j;
}
for ( int i = 1; i <= m; i++) {
currRow[0] = i;
for ( int j = 1; j <= n; j++) {
if (str1[i - 1] == str2[j - 1]) {
currRow[j] = prevRow[j - 1];
}
else {
currRow[j] = 1
+ min(
currRow[j - 1],
min(
prevRow[j],
prevRow[j - 1]));
}
}
prevRow = currRow;
}
return currRow[n];
}
int main()
{
string str1 = "kitten" ;
string str2 = "sitting" ;
int distance = levenshteinTwoMatrixRows(str1, str2);
cout << "Levenshtein Distance: " << distance;
return 0;
}
|
Java
import java.util.Arrays;
public class LevenshteinDistance {
public static int levenshteinTwoMatrixRows(String str1, String str2) {
int m = str1.length();
int n = str2.length();
int [] prevRow = new int [n + 1 ];
int [] currRow = new int [n + 1 ];
for ( int j = 0 ; j <= n; j++) {
prevRow[j] = j;
}
for ( int i = 1 ; i <= m; i++) {
currRow[ 0 ] = i;
for ( int j = 1 ; j <= n; j++) {
if (str1.charAt(i - 1 ) == str2.charAt(j - 1 )) {
currRow[j] = prevRow[j - 1 ];
} else {
currRow[j] = 1 + Math.min(currRow[j - 1 ], Math.min(prevRow[j], prevRow[j - 1 ]));
}
}
prevRow = Arrays.copyOf(currRow, currRow.length);
}
return currRow[n];
}
public static void main(String[] args) {
String str1 = "kitten" ;
String str2 = "sitting" ;
int distance = levenshteinTwoMatrixRows(str1, str2);
System.out.println( "Levenshtein Distance: " + distance);
}
}
|
Python3
def levenshtein_two_matrix_rows(str1, str2):
m = len (str1)
n = len (str2)
prev_row = [j for j in range (n + 1 )]
curr_row = [ 0 ] * (n + 1 )
for i in range ( 1 , m + 1 ):
curr_row[ 0 ] = i
for j in range ( 1 , n + 1 ):
if str1[i - 1 ] = = str2[j - 1 ]:
curr_row[j] = prev_row[j - 1 ]
else :
curr_row[j] = 1 + min (
curr_row[j - 1 ],
prev_row[j],
prev_row[j - 1 ]
)
prev_row = curr_row.copy()
return curr_row[n]
if __name__ = = "__main__" :
str1 = "kitten"
str2 = "sitting"
distance = levenshtein_two_matrix_rows(str1, str2)
print ( "Levenshtein Distance:" , distance)
|
C#
using System;
class LevenshteinDistance {
static int LevenshteinTwoMatrixRows( string str1,
string str2)
{
int m = str1.Length;
int n = str2.Length;
int [] prevRow = new int [n + 1];
int [] currRow = new int [n + 1];
for ( int j = 0; j <= n; j++) {
prevRow[j] = j;
}
for ( int i = 1; i <= m; i++) {
currRow[0] = i;
for ( int j = 1; j <= n; j++) {
if (str1[i - 1] == str2[j - 1]) {
currRow[j] = prevRow[j - 1];
}
else {
currRow[j] = 1
+ Math.Min(
currRow[j - 1],
Math.Min(
prevRow[j],
prevRow[j - 1]));
}
}
Array.Copy(currRow, prevRow, n + 1);
}
return currRow[n];
}
static void Main()
{
string str1 = "kitten" ;
string str2 = "sitting" ;
int distance = LevenshteinTwoMatrixRows(str1, str2);
Console.WriteLine( "Levenshtein Distance: "
+ distance);
}
}
|
Javascript
function levenshteinTwoMatrixRows(str1, str2) {
const m = str1.length;
const n = str2.length;
let prevRow = new Array(n + 1).fill(0);
let currRow = new Array(n + 1).fill(0);
for (let j = 0; j <= n; j++) {
prevRow[j] = j;
}
for (let i = 1; i <= m; i++) {
currRow[0] = i;
for (let j = 1; j <= n; j++) {
if (str1[i - 1] === str2[j - 1]) {
currRow[j] = prevRow[j - 1];
} else {
currRow[j] = 1 + Math.min(
currRow[j - 1],
prevRow[j],
prevRow[j - 1]
);
}
}
prevRow = [...currRow];
}
return currRow[n];
}
const str1 = "kitten" ;
const str2 = "sitting" ;
const distance = levenshteinTwoMatrixRows(str1, str2);
console.log( "Levenshtein Distance:" , distance);
|
Output
Levenshtein Distance: 3
Time complexity: O(m*n)
Auxiliary Space: O(n)
Share your thoughts in the comments
Please Login to comment...