首页 > 试题广场 >

最短编辑距离

[编程题]最短编辑距离
  • 热度指数:2023 时间限制:C/C++ 1秒,其他语言2秒 空间限制:C/C++ 32M,其他语言64M
  • 算法知识视频讲解
UNIX系统下有一个行编辑器ed,它每次只对一行文本做删除一个字符、插入一个字符或替换一个字符三种操作。例如某一行的内容是“ABC”,经过把第二个字符替换成“D”、删除第一个字符、末尾插入一个字符“B”,这三步操作后,内容就变成了“DCB”。即“ABC”变成“DCB”需要经过3步操作,我们称它们的编辑距离为3。

现在给你两个任意字符串(不包含空格),请帮忙计算它们的最短编辑距离。

输入描述:
输入包含多组数据。

每组数据包含两个字符串m和n,它们仅包含字母,并且长度不超过1024。


输出描述:
对应每组输入,输出最短编辑距离。
示例1

输入

ABC CBCD
ABC DCB

输出

2
3
我靠,python3通过不了.............老是说超时。。。。。。。本地测平均时间为240毫秒左右...........求解决(题目显示最多不能超过1秒)
ef minDistance_dp(word1, word2):
    """
        word1: 原始字符串
        word2: 目标字符串
    """

    N1, N2 = len(word1), len(word2)

    if(N1 == 0) or (N2 == 0):
        if N1 > N2:
            return N1
        else:
            return N2

    if (N1 < N2):
        return minDistance_dp(word2, word1)

    k = N2 + 1
    res = [i  for i in range(k)]

    for i in range(N1):
        
        old = res[0]
        res[0] = i + 1
        
        for j in range(1, k):
            
            tmp = res[j]
            jk = j - 1

            if word1[i] == word2[jk]:
                res[j] = old
            else:
                if tmp < old:
                    if tmp < res[jk]:
                        res[j] = tmp + 1
                    else:
                        res[j] = res[jk] + 1
                else:
                    if old < res[jk]:
                        res[j] = old + 1
                    else:
                        res[j] = res[jk] + 1

            old = tmp

    return res[N2]

def genetare_word(m,n):
    """
        n 表示字符的长度
    """
    word1 = [chr(randint(65,90)) for _ in range(m)]
    word2 = [chr(randint(65, 90)) for _ in range(n)]

    return (word1,word2)

# while True:

#     try:
#         word1, word2 = raw_input().split()
#         print(minDistance_dp(word1, word2))

#     except:
#         break

if __name__ == '__main__':
    for _ in range(500):
        m = randint(1, 1024)
        n = randint(1, 1024)
        word1,word2 = genetare_word(1024,1024)
        result = minDistance_dp(word1,word2)
        if result == None:
            print("算法不正确")

    # profile.run("minDistance_dp(word1, word2)")

发表于 2018-10-14 13:51:07 回复(0)

python通不过这道题目,希望能解决一下。

还是把代码贴一下:

def minDistance(word1, word2):
    l1, l2 = len(word1) + 1, len(word2) + 1
    dp = [[0 for i in range(l2)] for j in range(l1)]
    for i in range(l1):
        dp[i][0] = i
    for j in range(l2):
        dp[0][j] = j
    for i in range(1, l1):
        for j in range(1, l2):
            dp[i][j] = min(dp[i - 1][j] + 1, dp[i][j - 1] + 1, dp[i - 1][j - 1] + (word1[i - 1] != word2[j - 1]))
    return dp[-1][-1]


while True:
    try:
        word1,word2=input().split()
        print(minDistance(word1,word2))
    except:
        break
发表于 2017-11-15 22:10:31 回复(2)
#include<iostream>
#include<vector>
#include<string>
using namespace std;
int main(){
    string str1, str2;
    while(cin >> str1 >> str2){
        int n = str1.length();
        int m = str2.length();
        vector<vector<int> > dp(n+1);
        for(int i = 0; i <= n; ++i)
            dp[i].resize(m+1);
        dp[0][0] = 0;
        for(int i = 1; i <= n; ++i)
            dp[i][0] = i;
        for(int j = 1; j <= m; ++j)
            dp[0][j] = j;
        for(int i = 1; i <= n; ++i){
            for(int j = 1; j <= m; ++j){
                dp[i][j] = min(dp[i-1][j]+1, dp[i][j-1]+1);
                if(str1[i-1] == str2[j-1])
                    dp[i][j] = min(dp[i-1][j-1], dp[i][j]);
                else
                    dp[i][j] = min(dp[i][j], dp[i-1][j-1]+1); 
            }
        }
        cout << dp[n][m] << endl;
    }
    return 0;
}

发表于 2016-09-04 15:29:23 回复(0)
DHJ头像 DHJ
#include <iostream>
#include <string>
#include <vector>
using namespace std;

int minDistance(string word1, string word2) {
     int m = word1.size(), n = word2.size(); 
     if (word1 == "") return n;
     if (word2 == "") return m;

     vector<vector<int> > dp(m+1, vector<int>(n+1));
     for (int i = 0; i <= n; i++) dp[0][i] = i;
     for (int i = 0; i <= m; i++) dp[i][0] = i;

     for (int i = 1; i <= m; i++)
         for (int j = 1; j <= n; j++) {
         if (word1[i-1] == word2[j-1]) dp[i][j] = dp[i-1][j-1];
         else {
             dp[i][j] = min(dp[i-1][j], dp[i][j-1]); 
             dp[i][j] = min(dp[i][j], dp[i-1][j-1])+1;
         }
     }
     return dp[m][n];
 }

int main() {
    string s1, s2;
    while (cin >> s1 >> s2) {
        cout << minDistance(s1, s2) << endl;
    }
}

发表于 2016-09-04 14:46:47 回复(2)
import java.util.Scanner;
// write your code here
public class Main{
    public static void main(String[] args){
        Scanner in = new Scanner(System.in);
        Main m = new Main();
        while(in.hasNext()){
            String[] str = in.nextLine().split(" ");
            String word1 = str[0];
            String word2 = str[1];
            int min = m.minDistance(word1,word2);
            System.out.println(min);
        }
    }
    public  int minDistance(String word1,String word2){
        int len1 = word1.length();
        int len2 = word2.length();
        int[][] dp = new int[len1+1][len2+1];
        for(int i =0;i<=len1;i++){
            dp[i][0] = i;
        }
        for(int j =0;j<= len2;j++){
            dp[0][j] = j;
        }
        for(int i =0;i< len1;i++){
            char ch1 = word1.charAt(i);
            for(int j =0;j< len2;j++){
                char ch2 = word2.charAt(j);
                if(ch1 == ch2){
                    dp[i+1][j+1] = dp[i][j];
                }else{
                    int replace = dp[i][j] +1;// ch1 代替 ch2
                    int insert = dp[i][j+1] + 1;// ch2 插入到 ch1 前面的位置
                    int delete = dp[i+1][j] + 1;// 删除ch2
                    int min =replace>insert?insert:replace;
                    min = min>delete?delete:min;
                    dp[i+1][j+1] = min;
                }
            }
        }
        return dp[len1][len2];
    }
}

发表于 2016-03-23 19:23:57 回复(0)
python可以过呀
def shortestDistance(s1, s2):
    l1, l2 = len(s1) + 1, len(s2) + 1
    a, b = '0' + s1, '0' + s2
    dp = [[0] * l2 for i in range(l1)]
    for i in range(1, l1): dp[i][0] = i
    for i in range(1, l2): dp[0][i] = i
    '''
    a[i]添加之后相同,需要a[1~i]=b[1~j-1],dp[i][j]=dp[i][j-1]+1
    a[i]删除之后相同,需要a[1~i-1]=b[1~j],dp[i][j]=dp[i-1][j]+1
    a[i]修改之后相同,需要a[1~i-1]=b[1^j-1],dp[i][j]=dp[i-1][j-1]+1
    也可能a[i]不需要修改,就能a[1~i]=b[1~j],dp[i][j]=dp[i-1][j-1]
    '''
    for i in range(1, l1):
        for j in range(1, l2):
            dp[i][j] = min(dp[i][j - 1] + 1, dp[i - 1][j] + 1,
                           dp[i - 1][j - 1] + int(a[i] != b[j]))
    return dp[l1 - 1][l2 - 1]

import sys
x = sys.stdin.read()[:-1]
xx = x.split("<br/>")
ans = ""
for i in xx:
    s1, s2 = i.split()
    ans += f"{shortestDistance(s1, s2)}<br/>"
sys.stdout.write(ans[:-5])


发表于 2021-01-28 14:09:29 回复(0)
使用动态规划。对于两个字符串来说,“ABC”中‘A’的最优匹配 会存在以下四种情况:
对于(1),   dp[i][j] = 1+dp[i-1][j] ;
对于(2), dp[i][j] = 1+dp[i][j-1] ;
对于(3), dp[i][j] = dp[i-1][j-1] ;
对于(4), dp[i][j] = 1+ dp[i-1][j-1] ;
编辑于 2019-04-03 15:41:17 回复(0)
#Q2:levenshtein distance:
            n =int(input('input n strings:'))
            list = []
            edit_distance = 0
            for i in range(n):
                #print('input string %d:'%(i+1))
                list.append( input('input string %d:'%(i+1)))
            target = input('the target string is:')
            for string1 in list:
                num = 1
                print(string1)
                print(target)
                if string1 != '' and target != '':
                    m = len(string1) #the horizontal axis
                    n = len(target) #the vertical axis
                    #initialize:
                    d_matrix  = np.mat(np.zeros((n+1,m+1)))
                    for i in range(m):
                        d_matrix[0,i+1] = i+1
                    for i in range(n):
                        d_matrix[i+1,0] = i+1
                    #print(d_matrix)
                    f = 1
                    j = 1
                    for cha_j in target:
                        i = 1
                        #print(d_matrix[1,5])
                        for cha_i in string1:
                            if cha_i == cha_j:
                                f = 0
                            else:
                                f = 1
                            d_matrix[j,i]=min(d_matrix[j,i-1]+1,d_matrix[j-1,i]+1,d_matrix[j-1,i-1]+f)
                            i += 1
                        j +=1
                    edit_distance = d_matrix[n,m]
                else:
                    if string1 =='' and target =='':
                        edit_distance = 0
                    elif string1 =='':
                        edit_distance = len(target)
                    else:
                        edit_distance = len(string)
                print("the edit distance of string %d: "%(num),edit_distance)
编辑于 2018-07-11 20:16:54 回复(0)
 #include<iostream>
#include<string>
#include<vector>
using namespace std;
int findMinCost(string A, int n, string B, int m, int c0, int c1, int c2) {
        // write code here
        vector<vector<int > > dp(n+1,vector<int >(m+1));
        dp[0][0]=0;
        for(int i=1;i<n+1;i++)
            dp[i][0]=dp[i-1][0]+c1;
        for(int j=1;j<m+1;j++)
            dp[0][j]=dp[0][j-1]+c0;
        for(int i=1;i<n+1;i++)
            for(int j=1;j<m+1;j++)
                if(A[i-1]==B[j-1]) dp[i][j]=dp[i-1][j-1];
                else 
                {
                    int MIN=min(dp[i][j-1]+c0,dp[i-1][j-1]+c2);
                     dp[i][j]=min(dp[i-1][j]+c1,MIN);
                }
        return dp[n][m];
        
        
    }

int main(){
    
    string A;
    string B;
    while(cin>>A>>B)
    {
        cout<<findMinCost(A,A.size(),B,B.size(),1,1,1)<<endl;
    }
    
    return 0;
} 

发表于 2018-02-28 17:02:38 回复(0)
#include <bits/stdc++.h>
using namespace std;

int main() {
    string a, b;
    while (cin >> a >> b) {
        int i, j, alen = a.size(), blen = b.size();
        int dp[1024][1024] = {0};
        for (i = 0; i < alen; ++i) {
            if (a[i] == b[0]) {
                dp[i][0] = dp[i - 1][0];
                for (i += 1; i < alen; ++i) {
                    dp[i][0] = dp[i - 1][0] + 1;
                }
            }
            else
                dp[i][0] = i + 1;
        }
        for (i = 0; i < blen; ++i) {
            if (b[i] == a[0]) {
                dp[0][i] = dp[0][i - 1];
                for (i += 1; i < blen; ++i) {
                    dp[0][i] = dp[0][i - 1] + 1;
                }
            }
            else
                dp[0][i] = i + 1;
        }
        for (i = 1; i < alen; ++i) {
            for (j = 1; j < blen; ++j) {
                dp[i][j] = min(dp[i - 1][j - 1] + (a[i] != b[j] ? 1 : 0), min(dp[i][j - 1], dp[i - 1][j]) + 1);
            }
        }
        cout << dp[alen - 1][blen - 1] << endl;
    }
    return 0;
}

发表于 2018-02-22 09:29:28 回复(0)
动态规划问题:
import java.io.IOException;
import java.util.Scanner;

public class Levenshtein {
    public static void main(String[] args) throws IOException {
        Scanner sc = new Scanner(System.in);
        while (sc.hasNext()) {
            String line = sc.nextLine();
            String s1 = line.split(" ")[0];
            String s2 = line.split(" ")[1];
            int[][] dis = new int[s1.length() + 1][s2.length() + 1];
            for (int i = 0; i <= s1.length(); i++) {
                dis[i][0] = i;
            }
            for (int j = 0; j <= s2.length(); j++) {
                dis[0][j] = j;
            }
            for (int i = 1; i <= s1.length(); i++) {
                for (int j = 1; j <= s2.length(); j++) {
                    dis[i][j] = Math.min(dis[i][j - 1] + 1, Math.min(dis[i - 1][j] + 1,
                            dis[i - 1][j - 1] + (s1.charAt(i - 1) == s2.charAt(j - 1) ? 0 : 1)));
                }
            }
            System.out.println(dis[s1.length()][s2.length()]);
        }

    }
}

发表于 2018-01-26 10:21:25 回复(0)
import java.util.Arrays;
import java.util.Scanner;

/**
 * Created by Olive on 2017/9/7.
 * UNIX系统下有一个行编辑器ed,它每次只对一行文本做删除一个字符、插入一个字符
 * 或替换一个字符三种操作。例如某一行的内容是“ABC”,经过把第二个字符替换成“D”、
 * 删除第一个字符、末尾插入一个字符“B”,这三步操作后,内容就变成了“DCB”。
 * 即“ABC”变成“DCB”需要经过3步操作,我们称它们的编辑距离为3
 */
public class Main {
    public static void main(String[] args){
        Scanner in = new Scanner(System.in);
        while(in.hasNext()){
            String m = in.next();
            String n = in.next();
            System.out.println(distance(m, n));
        }
    }

    public static int distance(String str1, String str2){
        if(str1 == null || str2 == null || str1.length() == 0|| str2.length() == 0)
            return 0;

        int m = str1.length();
        int n = str2.length();
        // dp[i][j]代表从str1的第1-i字符转换为str2的第1-j字符的最短编辑距离,第0字符为''
        int[][] dp = new int[m + 1][n + 1];
        for(int i = 1; i <= m; i++){
            dp[i][0] = dp[i - 1][0] + 1;
        }
        for(int j = 1; j <= n; j++){
            dp[0][j] = dp[0][j - 1] + 1;
        }
        for(int i = 1; i <= m; i++){
            for(int j = 1; j <= n; j++){
                if(str1.charAt(i - 1) == str2.charAt(j - 1))
                    dp[i][j] = dp[i - 1][j - 1];
                else
                    dp[i][j] = Math.min(Math.min(dp[i][j - 1], dp[i - 1][j]), dp[i - 1][j - 1]) + 1;
            }
        }
        return dp[m][n];
    }
}


编辑于 2017-09-07 13:48:41 回复(0)
好奇怪,我把dp[][]数组设定为dp[1000][1000]段错误,设定为dp[1500][1500]段错误
设定为dp[1100][1100]就对了
#include <iostream>
#include <string>

using namespace std ;
string str1, str2;

int main(   )
{
	//cout << "输入两个字符串" << endl;

	while(cin >> str1>> str2 ){
        
     //表示x0~xi-1与y0~yi-1的最短编辑距离
	int dp[1100][1100];

	for (int i = 0; i <= str2.size(); ++i)
		dp[0][i] = i ;
	for (int i = 0; i <= str1.size(); ++i)
		dp[i][0] = i ;

	for(  int i=1; i<=str1.size(); ++i )
		for (int j = 1; j <= str2.size(); ++j) {
			int cost;
			if (str1[i - 1] == str2[j - 1])
				cost = 0;
			else
				cost = 1;
			int min;
			if (dp[i - 1][j - 1] + cost < dp[i - 1][j] + 1)
				min = dp[i - 1][j - 1] + cost;
			else
				min = dp[i - 1][j]+1 ;//删除xi
			if (dp[i][j - 1] + 1 < min)//添加yj
				min = dp[i][j - 1] + 1;
			dp[i][j] = min;
		}
		cout << dp[str1.size()][str2.size()] << endl ;   
    }
		


	return 0 ;
}

发表于 2017-04-14 10:08:50 回复(0)
#include <iostream>
#include <vector>
#include <string>
using namespace std;
int main()
{
std::string s1;
std::string s2;
int distance[1025][1025];
while(cin>>s1>>s2)
{

for(int i = 0; i<= s1.size();++i)
{
distance[i][0] = i;                      
}
for(int i = 0; i<= s2.size();++i)
{
distance[0][i] = i;                      
}
for(int i = 0; i<s1.size();++i )
{
for(int j = 0; j< s2.size();++j )
{
int cost = s1[i]==s2[j]? 0 :1;
distance[i+1][j+1]= min( min(distance[i][j+1]+1,
distance[i+1][j]+1 ),
distance[i][j] + cost );                          
}

}
cout<<distance[s1.size()][s2.size()]<<std::endl; 

}

}
发表于 2017-03-24 21:02:15 回复(0)
import java.util.Scanner;
public class Main {
	public static void main(String[] args) {
		Scanner sc = new Scanner(System.in);
		while (sc.hasNext()) {
			String m = sc.next();
			String n = sc.next();
			int[][] dp = new int[m.length() + 1][n.length() + 1];
			for (int i = 0; i < dp.length; i ++ )
				dp[i][0] = i;
			for (int i = 0; i < dp[0].length; i ++ )
				dp[0][i] = i;
			for (int i = 1; i < dp.length; i ++ ) {
				for (int j = 1; j < dp[0].length; j ++ ) {
					dp[i][j] = m.charAt(i - 1) == n.charAt(j - 1) ? dp[i - 1][j - 1] : Math.min(dp[i - 1][j - 1] + 1, Math.min(dp[i - 1][j] + 1, dp[i][j - 1] + 1));
				}
			}
			System.out.println(dp[m.length()][n.length()]);
		}
	}
}

发表于 2016-10-12 02:17:22 回复(0)
 import java.util.Scanner;


public class Main {
 public static void main(String[] args)
  {
 Scanner sc=new Scanner(System.in);
 while(sc.hasNext()){
 String line=sc.nextLine();
 String[]
  str=line.split(" ");
 int m=str[0].length();
 int
  n=str[1].length();
 
 int[][] matrix=new int[m+1][n+1];
 
 for(int i=1;i<=n;i++){
 matrix[0][i]=i;
 }
 
 for(int j=1;j<=m;j++){
 matrix[j][0]=j;
 }
 
 for(int i=1;i<=m;i++){
 for(int
  j=1;j<=n;j++){
 if(str[0].charAt(i-1)==str[1].charAt(j-1)){
 matrix[i][j]=matrix[i-1][j-1];
 }else{
 matrix[i][j]=Math.min(Math.min(matrix[i-1][j]+1,
  matrix[i][j-1]+1)
 , matrix[i-1][j-1]+1);
 }
 }
 }
 
 System.out.println(matrix[m][n]);
 }
 }
 
 }
 
 

编辑于 2016-09-06 11:36:12 回复(0)
这个题编辑代价都是一。另外附上左老师的视频讲解,http://www.nowcoder.com/discuss/1861。
发表于 2015-12-26 21:29:32 回复(0)