SQLServer对⽐两字符串的相似度(函数算法)
⼀、概述
最近有⼈问到关于两个字符串求相似度的函数,所以就写了本篇⽂章,分别是“简单的模糊匹配”,“顺序匹配”,“⼀对⼀位置匹配”。在平时的这种函数可能会需要⽤到,业务需求不⼀样,这⾥只给出参照,实际情况可以相应修改。本⽂所有的两个字段⽐较都是除以⽐较字段本⾝,例如A与B⽐较,出的长度除以A的长度,因为考虑如果A的长度⼤于B的长度,相似度会超100%,例如‘abbc’,'ab'.
如果⼤家想除以B的长度,只需要在语句末尾将‘SET @num=@num*1.0/LEN(@Cloumna)’修改成‘SET @num=@num*1.0/LEN(@Cloumnb)’
1.两个字符串简单相似
---两个字段简单相似
CREATE FUNCTION DBO.FN_Resemble
(@Cloumna NVARCHAR(MAX),
@Cloumnb NVARCHAR(MAX)
)
RETURNS FLOAT
AS
BEGIN
DECLARE@num FLOAT,@len int
SET@Cloumna=ISNULL(@Cloumna,0)
SET@Cloumnb=ISNULL(@Cloumnb,0)
SET@len=1
SET@num=0
WHILE(LEN(@Cloumna)<>0AND LEN(@CloumnB)<>0)
BEGIN
WHILE(@len<=LEN(@Cloumna))
BEGIN
DECLARE@a NVARCHAR(4)
SET@a=''
SET@a=SUBSTRING(@Cloumna,@len,1)
IF(CHARINDEX(@a,@CloumnB)>0)
BEGIN
SET@num=@num+1
END
SET@len=@len+1
END
SET@num=@num*1.0/LEN(@Cloumna)
BREAK
END
RETURN@num
END
----测试代码
SELECT DBO.FN_Resemble('ABDC321G','ABDC123G')
2.两个字符串顺序相似
---两个字段顺序相似
CREATE FUNCTION DBO.FN_Resemble_order
(@Cloumna NVARCHAR(MAX),
@Cloumnb NVARCHAR(MAX)
)
RETURNS FLOAT
AS
BEGIN
DECLARE@num FLOAT,@len int
SET@Cloumna=ISNULL(@Cloumna,0)
SET@Cloumnb=ISNULL(@Cloumnb,0)
SET@len=1
SET@num=0
WHILE(LEN(@Cloumna)<>0AND LEN(@CloumnB)<>0)
BEGIN
DECLARE@a NVARCHAR(4)
DECLARE@b NVARCHAR(4)
IF(LEN(@Cloumna)>=LEN(@CloumnB))
BEGIN
WHILE(@len<=LEN(@CloumnB))
BEGIN
SET@a=''
SET@a=SUBSTRING(@Cloumna,@len,1)
SET@b=''
SET@b=SUBSTRING(@CloumnB,@len,1)
IF(@a=@b)
BEGIN
SET@num=@num+1
END
ELSE
BEGIN
break
END
SET@len=@len+1
END
END
ELSE IF (LEN(@Cloumna)<LEN(@CloumnB))
BEGIN
WHILE(@len<=LEN(@Cloumna))
BEGIN
SET@a=''
SET@a=SUBSTRING(@Cloumna,@len,1)
SET@b=''
SET@b=SUBSTRING(@CloumnB,@len,1)
IF(@a=@b)
BEGIN
SET@num=@num+1
END
ELSE
BEGIN
break
END
SET@len=@len+1
END
END
SET@num=@num*1.0/LEN(@Cloumna)
BREAK
END
RETURN@num
END
go
----测试代码
SELECT DBO.FN_Resemble_order('ABDC456G','ABDC123G') 3.两个字符串⼀对⼀相似
---两个字段⼀对⼀相似
CREATE FUNCTION DBO.FN_Resemble_onebyone
(@Cloumna NVARCHAR(MAX),
@Cloumnb NVARCHAR(MAX)
)
RETURNS FLOAT
AS
BEGIN
DECLARE@num FLOAT,@len int
SET@Cloumna=ISNULL(@Cloumna,0)
SET@Cloumnb=ISNULL(@Cloumnb,0)
字段字符串去重复SET@len=1
SET@num=0
WHILE(LEN(@Cloumna)<>0AND LEN(@CloumnB)<>0) BEGIN
DECLARE@a NVARCHAR(4)
DECLARE@b NVARCHAR(4)
IF(LEN(@Cloumna)>=LEN(@CloumnB))
BEGIN
WHILE(@len<=LEN(@CloumnB))
BEGIN
SET@a=''
SET@a=SUBSTRING(@Cloumna,@len,1)
SET@b=''
SET@b=SUBSTRING(@CloumnB,@len,1)
IF(@a=@b)
BEGIN
SET@num=@num+1
END
SET@len=@len+1
END
END
ELSE IF (LEN(@Cloumna)<LEN(@CloumnB))
BEGIN
WHILE(@len<=LEN(@Cloumna))
BEGIN
SET@a=''
SET@a=SUBSTRING(@Cloumna,@len,1)
SET@b=''
SET@b=SUBSTRING(@CloumnB,@len,1)
IF(@a=@b)
BEGIN
SET@num=@num+1
END
SET@len=@len+1
END
END
SET@num=@num*1.0/LEN(@Cloumna)
BREAK
END
RETURN@num
END
----测试代码
SELECT DBO.FN_Resemble_onebyone('ABDC456G','ABDC123G')
4.对⽐两个版本号的⼤⼩
如果前⾯⽐后⾯的⼤返回1,⼩返回-1,相等返回0
ALTER FUNCTION FNStrCompare
(@Val1VARCHAR(50),---⽐较字符串1
@Val2VARCHAR(50),---⽐较字符串2
@Break VARCHAR(10) ---分隔符
)
RETURNS INT
AS
BEGIN
DECLARE@Num1INT
DECLARE@Num2INT
DECLARE@Val1Num INT
DECLARE@Val2Num INT
DECLARE@a INT
IF CHARINDEX(@Break,@Val1)>0AND CHARINDEX(@Break,@Val2)>0 BEGIN
WHILE LEN(@Val1)>0AND LEN(@Val2)>0
BEGIN
IF CHARINDEX(@Break,@Val1)>0AND CHARINDEX(@Break,@Val2)>0
BEGIN
SET@Num1=CHARINDEX(@Break,@Val1)-1
SET@Val1Num=LEFT(@Val1,@Num1)
SET@Val1=SUBSTRING(@Val1,@Num1+2,LEN(@Val1))
SET@Num2=CHARINDEX(@Break,@Val2)-1
SET@Val2Num=LEFT(@Val2,@Num2)
SET@Val2=SUBSTRING(@Val2,@Num1+2,LEN(@Val2)) END
ELSE
BEGIN
SET@Val1Num=CONVERT(INT,@Val1)
SET@Val2Num=CONVERT(INT,@Val2)
IF@Val1Num=@Val2Num
BEGIN
SET@a=0
BREAK
END
END
IF@Val1Num>@Val2Num
BEGIN
SET@a=1
BREAK
END
IF@Val1Num<@Val2Num
BEGIN
SET@a=-1
BREAK
END
END
END
ELSE
BEGIN
SET@Val1Num=CONVERT(INT,@Val1)
SET@Val2Num=CONVERT(INT,@Val2)
IF@Val1Num>@Val2Num
BEGIN
SET@a=1
END
IF@Val1Num<@Val2Num
BEGIN
SET@a=-1
END
IF@Val1Num=@Val2Num
BEGIN
SET@a=0
END
END
RETURN@a
END
执⾏
SELECT chenmh.dbo.FNStrCompare('1.15.1','1.15.1','.') SELECT chenmh.dbo.FNStrCompare('1.15.2','1.15.1','.') SELECT chenmh.dbo.FNStrCompare('1.15.2','2.3.1','.') SELECT chenmh.dbo.FNStrCompare('1.08.2','1.15.1','.') SELECT dbo.FNStrCompare('1','2','.')
版权声明:本站内容均来自互联网,仅供演示用,请勿用于商业和其他非法用途。如果侵犯了您的权益请与我们联系QQ:729038198,我们将在24小时内删除。
发表评论