java实现字符串匹配问题之求两个字符串的最⼤公共⼦串
近期在项⽬⼯作中有⼀个关于⽂本对照的需求,经过这段时间的学习,总结了这篇博客内容:求两个字符串的最⼤公共⼦串。
算法思想:基于图计算两字符串的公共⼦串。详细算法思想參照下图:
输⼊字符串S1:achmacmh    输⼊字符串S2:macham
1)第a步,是将字符串s1,s2分别按字节拆分,构成⼀个⼆维数组;
2)⼆维数组中的值如b所看到的,⽐⽅第⼀⾏第⼀列的值表⽰字符串s2和s1的第⼀个字节是否相等,若相等就是1,否则就是0,终于产⽣b 所看到的的⼆维数组;
3)分别求⼆维数组中斜线上的公共因⼦(斜线为元素a右下⾓值,即a[i][j]的下⼀个元素是a[i+1][j+1];公共因⼦为1所在的位置构成的字符串);
4)对全部公共因⼦排序,返回最⼤的公共因⼦的值。
详细的实现代码例如以下所看到的:
package cn.luleipare;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
import java.util.List;
public class StringCompare {
private int a;
private int b;
public String getMaxLengthCommonString(String s1, String s2) {
if (s1 == null || s2 == null)  {
return null;
}
a = s1.length();//s1长度做⾏
b = s2.length();//s2长度做列
if(a== 0 || b == 0) {
return "";
}
//设置匹配矩阵
boolean [][] array = new boolean[a][b];
for (int i = 0; i  < a; i++) {
char c1 = s1.charAt(i);
for (int j = 0; j < b; j++) {
char c2 = s2.charAt(j);
if (c1 == c2) {
array[i][j] = true;
}  else {
array[i][j] = false;
}
}
}
//求全部公因⼦字符串,保存信息为相对第⼆个字符串的起始位置和长度
List<ChildString> childStrings = new ArrayList<ChildString>();
for (int i = 0; i < a; i++) {
getMaxSort(i, 0, array, childStrings);
}
for (int i = 1; i < b; i++) {
getMaxSort(0, i, array, childStrings);
}
//排序
sort(childStrings);
if (childStrings.size() < 1) {
return "";
}
//返回最⼤公因⼦字符串
int max = (0).maxLength;
StringBuffer sb = new StringBuffer();
for (ChildString s: childStrings) {
if (max != s.maxLength) {
break;
}
sb.append(s2.substring(s.maxStart, s.maxStart + s.maxLength));
sb.append("\n");
}
String();
}
/
/排序,倒叙
private void sort(List<ChildString> list) {
Collections.sort(list, new Comparator<ChildString>(){
public int compare(ChildString o1, ChildString o2) {
return o2.maxLength - o1.maxLength;
}
});
}
//求⼀条斜线上的公因⼦字符串
private void getMaxSort(int i, int j, boolean [][] array, List<ChildString> sortBean) {
int length = 0;
int start = j;
for (; i < a && j < b; i++,j++) {
if (array[i][j]) {
length++;
} else {
sortBean.add(new ChildString(length, start));
length = 0;
start = j + 1;
}
if (i == a-1 || j == b-1) {
sortBean.add(new ChildString(length, start));
}
}
}
//公因⼦类
class ChildString {
int maxLength;
int maxStart;
ChildString(int maxLength, int maxStart){
this.maxLength = maxLength;
this.maxStart = maxStart;
}
}
/**
* @param args
*/
public static void main(String[] args) {
// TODO Auto-generated method stub
System.out.println(new StringCompare().getMaxLengthCommonString("achmacmh", "macham"));
}
}
程序终于运⾏结果是:
对于两个⽂件的⽐对个⼈觉得能够參照这样的算法思想(⾃⼰如今并为实现),在⽇后的博客中将会写到。
上述实现过程中,⽤数组保存了全部的公共⼦串信息,然后排序取最⼤的⼦串,这样的做法假设仅仅是求最⼤⼦串的话,算法就不是⾮常合理,因此做了例如以下改动,List仅仅保存当前计算中最⼤的⼦串,详细实现例如以下:
/**
*@Description: 字符串⽐較
*/
package st;
import java.util.ArrayList;
import java.util.List;
public class StringCompare {
private int a;
private int b;
private int maxLength = -1;
public String getMaxLengthCommonString(String s1, String s2) {
if (s1 == null || s2 == null)  {
return null;
}
a = s1.length();//s1长度做⾏
b = s2.length();//s2长度做列
if(a== 0 || b == 0) {
return "";
}
//设置匹配矩阵
boolean [][] array = new boolean[a][b];
for (int i = 0; i  < a; i++) {
char c1 = s1.charAt(i);
for (int j = 0; j < b; j++) {
char c2 = s2.charAt(j);
if (c1 == c2) {
array[i][j] = true;
字符串长度最大是多少}  else {
array[i][j] = false;
}
}
}
//求全部公因⼦字符串,保存信息为相对第⼆个字符串的起始位置和长度
List<ChildString> childStrings = new ArrayList<ChildString>();
for (int i = 0; i < a; i++) {
getMaxSort(i, 0, array, childStrings);
}
for (int i = 1; i < b; i++) {
getMaxSort(0, i, array, childStrings);
}
StringBuffer sb = new StringBuffer();
for (ChildString s: childStrings) {
sb.append(s2.substring(s.maxStart, s.maxStart + s.maxLength));
sb.append("\n");
}
String();
}
//求⼀条斜线上的公因⼦字符串
private void getMaxSort(int i, int j, boolean [][] array, List<ChildString> sortBean) {
int length = 0;
int start = j;
for (; i < a && j < b; i++,j++) {
if (array[i][j]) {
length++;
} else {
//直接add,保存全部⼦串,以下的推断,仅仅保存当前最⼤的⼦串
//sortBean.add(new ChildString(length, start));
if (length == maxLength) {
sortBean.add(new ChildString(length, start));
} else if (length > maxLength) {
sortBean.clear();
maxLength = length;
sortBean.add(new ChildString(length, start));
}
length = 0;
start = j + 1;
}
if (i == a-1 || j == b-1) {
//直接add,保存全部⼦串,以下的推断,仅仅保存当前最⼤的⼦串
//sortBean.add(new ChildString(length, start));
if (length == maxLength) {
sortBean.add(new ChildString(length, start));
} else if (length > maxLength) {
sortBean.clear();
maxLength = length;
sortBean.add(new ChildString(length, start));
}
}
}
}
//公因⼦类
class ChildString {
int maxLength;
int maxStart;
ChildString(int maxLength, int maxStart){
this.maxLength = maxLength;
this.maxStart = maxStart;
}
}
/**
* @param args
*/
public static void main(String[] args) {
// TODO Auto-generated method stub
System.out.println(new StringCompare().getMaxLengthCommonString("abcdef", "defabc"));
} }

版权声明:本站内容均来自互联网,仅供演示用,请勿用于商业和其他非法用途。如果侵犯了您的权益请与我们联系QQ:729038198,我们将在24小时内删除。