Thursday, May 24, 2018

Levenshtein Distance Algorithm (R code)

Levenshtein Distance represents the minimum number of letter changes required to convert a word to another word.

The below R language code gives the Levenshtein Distance (letter changes required) to convert "Saturday" to "Sunday".



str1 <- c("Saturday")
str2 <- c("Sunday")
str1knt <- nchar(str1)
str2knt <- nchar(str2)
str11<-substring(str1,1:str1knt,1:str1knt)
m2<-matrix(c(" ","",str11),nrow=1,ncol=str1knt+2,byrow=FALSE)
sq<-seq(0,str1knt)
m2<-rbind(m2,c(" ",sq))

i=1
k=str2knt
while(k>0){ 
m2=rbind(m2,c(substring(str2,i,i),i,replicate(str1knt,0)))
k=k-1
i=i+1
}

k=3
while(k<=str2knt+2){
i=3
                while(i<=str1knt+2){
                apex<-as.numeric(m2[k-1,i-1])
                horiz<-as.numeric(m2[k-1,i])
                verti<-as.numeric(m2[k,i-1])
                s2char<-m2[k,1]
                s1char<-m2[1,i]

                if(s2char==s1char){ m2[k,i] <- min(apex,horiz,verti)} else { m2[k,i] <- min(apex,horiz,verti)+1}
                i=i+1
                }
k=k+1
}

LevenshteinDistance <- as.numeric(m2[k-1,i-1])
LevenshteinDistance

Levenshtein Distance Algorithm (TSQL)


Levenshtein Distance represents the minimum number of letter changes required to convert a word to another word.

The below TSQL code gives the Levenshtein Distance (letter changes required) to convert "Saturday" to "Sunday".


SET NOCOUNT ON
DECLARE @a VARCHAR(100) = 'sunday', @b VARCHAR(100) = 'saturday'
DECLARE @lddistance VARCHAR(10)
DECLARE @lena INT = (SELECT LEN(@a)), @lenb INT = (SELECT LEN(@b)), @lena2 INT, @lenb2 INT
SELECT @lena2 = @lena, @lenb2 = @lenb
DECLARE @qry NVARCHAR(1000), @lblcntr INT = 3
DECLARE @colcount INT = 0, @colcount2 INT = 0, @rowcount INT = 0,@rowcount2 INT = 0

CREATE TABLE #LDAlg (lbl0 VARCHAR(10),lbl1 VARCHAR(10),[0] INT)
INSERT #LDAlg (lbl0,lbl1,[0]) SELECT 'R1',NULL,NULL UNION ALL SELECT 'R2',NULL,NULL

WHILE(@lenb >0)
BEGIN
SET @qry = 'ALTER TABLE #LDAlg ADD ['+ CAST(@lenb2+1-@lenb AS NVARCHAR)+'] VARCHAR(10)'
EXEC SP_EXECUTESQL @qry
SET @colcount = @colcount+1
SET @qry = ''
SET @qry = 'UPDATE #LDAlg SET ['+ CAST(@lenb2+1-@lenb AS NVARCHAR) + '] = (SELECT SUBSTRING('''+@b+''', '+CAST(@lenb2+1-@lenb AS NVARCHAR) +',1)) WHERE lbl0 = ''R1'''
EXEC SP_EXECUTESQL @qry
SET @qry = ''
SET @qry = 'UPDATE #LDAlg SET ['+ CAST(@lenb2+1-@lenb AS NVARCHAR) + '] = (SELECT ''' +CAST(@lenb2+1-@lenb AS NVARCHAR) +''') WHERE lbl0 = ''R2'''
EXEC SP_EXECUTESQL @qry
SET @lenb = @lenb - 1
END
--SET @lenb = @lenb2

WHILE(@lena > 0)
BEGIN
SET @qry = ''
SET @qry = 'INSERT #LDAlg (lbl1, [0]) SELECT SUBSTRING('''+@a+''', '+CAST(@lena2+1-@lena AS NVARCHAR) +',1),'+CAST(@lena2+1-@lena AS NVARCHAR)
EXEC SP_EXECUTESQL @qry
SET @lena = @lena-1
SET @lblcntr = @lblcntr +1
END

UPDATE #LDAlg SET [0] = 0 WHERE lbl0 = 'R2'

ALTER TABLE #LDAlg DROP COLUMN lbl0

SELECT @rowcount = (SELECT COUNT(*) FROM #LDAlg WHERE [0] > 0)
SET @rowcount2 = 1

CREATE TABLE #tbl(BValue NVARCHAR(10),ApexValue INT, HorizValue INT, VertValue INT)

DECLARE @apex VARCHAR(10) = 0, @horiz VARCHAR(10) = 1, @verti VARCHAR(10) =1, @aval VARCHAR(10), @bval VARCHAR(10)
DECLARE @valuecomp VARCHAR(10) = 0
--DECLARE @horizcolname INT = 1
WHILE (@rowcount2 <= @rowcount)
BEGIN
--PRINT 'Row'+CAST(@rowcount2 AS VARCHAR)
SET @colcount2 = 1
WHILE (@colcount2 <= @colcount)
BEGIN
--PRINT 'Column '+ CAST(@colcount2 AS VARCHAR)
SET @aval = (SELECT lbl1 FROM #LDAlg WHERE [0] = @rowcount2)

SET @qry = ''
SET @qry = 'SELECT ['+CAST(@colcount2 AS VARCHAR)+'] FROM #LDAlg WHERE [0] IS NULL'
INSERT INTO #tbl (BValue)
EXEC SP_EXECUTESQL @qry

SET @qry = ''
SET @qry = 'UPDATE #tbl SET ApexValue = (SELECT ['+CAST(@colcount2-1 AS VARCHAR)+'] FROM #LDAlg WHERE [0] = '+CAST(@rowcount2-1 AS VARCHAR)+')'
EXEC SP_EXECUTESQL @qry

SET @qry = ''
SET @qry = 'UPDATE #tbl SET HorizValue = (SELECT ['+CAST(@colcount2 AS VARCHAR)+'] FROM #LDAlg WHERE [0] = '+CAST(@rowcount2-1 AS VARCHAR)+')'
EXEC SP_EXECUTESQL @qry


SET @qry = ''
SET @qry = 'UPDATE #tbl SET VertValue = (SELECT ['+CAST(@colcount2-1 AS VARCHAR)+'] FROM #LDAlg WHERE [0] = '+CAST(@rowcount2 AS VARCHAR)+')'
EXEC SP_EXECUTESQL @qry

SELECT @bval = BValue, @apex = ApexValue, @horiz = HorizValue, @verti = VertValue FROM #tbl
DELETE FROM #tbl

SET @valuecomp = (SELECT MIN(Val) FROM ( SELECT @apex Val UNION ALL SELECT @horiz UNION ALL SELECT @verti) q )

SET @qry = ''
SET @qry = 'UPDATE #LDAlg SET ['+CAST(@colcount2 AS VARCHAR)+']= CASE WHEN '''+@aval+''' = '''+@bval+''' THEN '+@valuecomp+' ELSE '+CAST(@valuecomp + 1 AS VARCHAR)+' END WHERE [0] = '+CAST(@rowcount2 AS VARCHAR)
EXEC SP_EXECUTESQL @qry

SET @colcount2 = @colcount2 + 1
END
SET @rowcount2 = @rowcount2 + 1
END
--SELECT * FROM #LDAlg
SET @qry = ''
SET @qry = 'SELECT ['+CAST(@colcount AS VARCHAR)+'] AS [Levenshtein Distance] FROM #LDAlg WHERE [0] = '+CAST(@rowcount2 - 1 AS VARCHAR)
EXEC SP_EXECUTESQL @qry

DROP TABLE #LDAlg
DROP TABLE #tbl
GO