最长公共子串的 DP 记忆方法

问题描述 投票:0回答:9

任何人都可以提供两个字符串之间最长公共子字符串的记忆方法。我知道底部解决方案,但我无法以自上而下的方式思考。 预期时间复杂度-O(n^2)

algorithm dynamic-programming
9个回答
8
投票

自上而下的方法

#include <iostream>
#include <algorithm>
#include <cstring>
using namespace std;

string X, Y;             //input strings
int ans, dp[105][105];   // ans : answer

int LCS(int n, int m)    //our function return value of (n,m) state
{                        // so that we can use the result in (n+1,m+1) state
  if(n == 0 || m == 0) return 0;   //in case of match in (n+1,m+1) state
  if(dp[n][m] != -1) return dp[n][m];

  LCS(n-1,m);          //to visit all n*m states          (try example:  X:ASDF
  LCS(n,m-1);          // we call these states first                     Y:ASDFF)

  if(X[n-1] == Y[m-1])
  {

    dp[n][m] =  LCS(n-1,m-1) + 1;
    ans = max(ans, dp[n][m]);
    return dp[n][m];
  }
    return dp[n][m] = 0;
}

int main()
{
    int t; cin>>t;
    while(t--)
    {
      int n, m; cin>>n>>m;  //length of strings
      cin>>X>>Y;
      memset(dp, -1, sizeof dp);
      ans = 0;
      LCS(n, m);
      cout<<ans<<'\n';
    }
    return 0;
}

3
投票
Java Solution: 

class Solution {
public int findLength(int[] A, int[] B) {

    int[][] cache = new int[A.length][B.length];
    Arrays.stream(cache).forEach(a->Arrays.fill(a,-1));
    int[] res = new int[1];
    findLength(0, 0, A, B, cache, res);
    return res[0];
}

public static int findLength(int a, int b, int[] A, int[] B, int[][] cache, int[] res){
    
    if( a >= A.length || b >= B.length )
        return 0;
    
    if(cache[a][b] != -1){
        return cache[a][b];
    }
    if(A[a] == B[b]){   
        cache[a][b] = 1 + findLength(a+1,b+1,A,B,cache,res);
      //  remember you can not return here: why? see case: s1 = 1,2,3 s2=1,4,1,2,3
    }
    // try out other possiblities and update cache
    findLength(a+1,b,A,B,cache,res);

    findLength(a,b+1,A,B,cache,res);

    //you can avoid this and find max value at end in cache
    res[0] = Math.max(res[0],cache[a][b]);

    //at this point cache might have -1 or updated value, if its -1 make it to 0 as this location is visited and no common substring is there from here
    cache[a][b] = Math.max(0,cache[a][b]);
    
return cache[a][b];
}
}

1
投票

带有递归的记忆化采用自上而下的方法。 下面以使用 Cormen 的 DP 的 LCS 示例为例,描述其工作原理。

MEMOIZED-LCS-LENGTH(X,Y)
 m<-length[X]
 n<-length[Y]
for(i<-1 to m)
  do for(j<-1 to n)
        c[i,j]<- -1

for(i<-1 to m)
    c[i,0]<-0
for(j<-1 to n)
    c[0,j]<-0
return RECURSIVE-LCS-LENGTH(X,Y,1,1)


RECURSIVE-LCS-LENGTH(X,Y,i,j)
if(c[i,j]!=-1) 
  return c[i,j]
//Above 2 line fetches the result if already present, instead of computing it again.
if(x[i]==y[j]) 
  then c[i,j]<-RECURSIVE-LCS-LENGTH(X,Y,i+1,j+1)+1 
else 
     c1<- RECURSIVE-LCS-LENGTH(X,Y,i+1,j)
     c2<-RECURSIVE-LCS-LENGTH(X,Y,i,j+1)
       if(c1<c2)
         then c[i,j]<-c1
       else c[i,j]<-c2

return c[i,j]

1
投票

Python 中的递归加记忆化。请注意,此代码已被 Hackerearth 和 Geeksforgeeks 部分接受。对于较大的测试用例,它提供 MLE。

import sys
sys.setrecursionlimit(1000000)
maxlen=0
t=None
def solve(s1, s2, n, m):
   global maxlen, t
   if n<=0 or m<=0:
       return 0
   if t[n][m]!=-1:
       return t[n][m]
   if s1[n-1]==s2[m-1]:
       temp=1+solve(s1, s2, n-1, m-1)
       maxlen=max(maxlen, temp)
       t[n][m]=temp
       return temp
   t[n][m]=0
   return 0
class Solution:
   def longestCommonSubstr(self, S1, S2, n, m):
       global maxlen, t
       maxlen=0
       t=[[-1]*(m+1) for i in range(n+1)]
       for i in range(n+1):
           for j in range(m+1):
               solve(S1, S2, i, j)
       return maxlen
if __name__=='__main__':
   S1=input().strip()
   S2=input().strip()
   n=len(S1)
   m=len(S2)
   ob = Solution()
   print(ob.longestCommonSubstr(S1, S2, n, m))

0
投票

下面描述了一个简单的解决方案。这里

memo[n][m]
不存储长度 最大子串,但您可以将最大子串存储在指针 maxi 中,如下所示:

 #include<iostream>
 #include<string>
 using namespace std;
 int lcs(string X,string Y,int n,int m,int *maxi,int memo[][8]) {


if(n==0||m==0) {

return 0;
}
int k=0;
int j=0;

if(memo[n-1][m-1]!=-1) {
return memo[n-1][m-1];
}
if(X[n-1]==Y[m-1]) {

memo[n-1][m-1] =  1+lcs(X,Y,n-1,m-1,maxi,memo);
if(*maxi<memo[n-1][m-1]) 
*maxi=memo[n-1][m-1];

}
else {
memo[n-1][m-1]=0;
}


int l =  lcs(X,Y,n-1,m,maxi,memo);
int i = lcs(X,Y,n,m-1,maxi,memo);

return memo[n-1][m-1];



}

int main() 
{ 
int n,m; 

string X = "abcdxyze"; 
//string X = "abcd";
string Y = "xyzabcde"; 

 n=X.length(); 
 m=Y.length(); 
 int memo[n][8];
 for(int i=0;i<n;i++) {
  for(int j=0;j<m;j++) {
  memo[i][j]=-1;
  }
 }
 int maxi=0;
 int k = lcs(X,Y,n,m,&maxi,memo); 
 cout << maxi;
  return 0; 
 } 

0
投票
class Solution {
public:
    int t[1005][1005];
    int maxC = 0;
    int recur_memo(vector<int>& nums1, vector<int>& nums2, int m, int n) {
        if(t[m][n] != -1)
            return t[m][n];
    
        if(m == 0 || n == 0)
            return 0;
    
        int max_substring_ending_here = 0;
        //Example : "abcdezf"   "abcdelf"
        //You see that wowww, string1[m-1] = string2[n-1] = 'f' and you happily   
        go for (m-1, n-1)
        //But you see, in future after a gap of 'l' and 'z', you will find 
        "abcde" and "abcde"
        if(nums1[m-1] == nums2[n-1]) {
            max_substring_ending_here = 1 + recur_memo(nums1, nums2, m-1, n-1);
        }
    
        //May be you find better results if you do (m-1, n) and you end up 
        updating maxC with some LAAARGEST COMMON SUBSTRING LENGTH
        int decrease_m = recur_memo(nums1, nums2, m-1, n); //stage (m-1, n)
    
        //OR,
        //May be you find better results if you do (m, n-1) and you end up 
        updating maxC with some LAAARGEST COMMON SUBSTRING LENGTH
        int decrease_n  = recur_memo(nums1, nums2, m, n-1); //stage (m, n-1)
    
        //Like I said, you need to keep on finding the maxC in every call you 
        make throughout your journey.
        maxC = max({maxC, max_substring_ending_here, decrease_m, decrease_n});
    
    
        //BUT BUT BUT, you need to return the best you found at this stage (m, n)
        return t[m][n] = max_substring_ending_here;
    }

    int findLength(vector<int>& nums1, vector<int>& nums2) {
        int m = nums1.size();
        int n = nums2.size();
        memset(t, -1, sizeof(t));
        recur_memo(nums1, nums2, m, n); //resurive+memoization
        return maxC;
    }
};

链接:https://leetcode.com/problems/maximum-length-of-repeated-subarray/discuss/1169215/(1)-Recursive%2BMemo-(2)-Bottom-Up-(C%2B%2B )


0
投票
class Solution{
private int ans = Integer.MIN_VALUE;

public int dfs(int i, int j, String str1, String str2, int[][]dp){
    if(i < 0 || j < 0){
        return 0;
    }
    if(dp[i][j] != -1){
        return dp[i][j];
    }
    if(str1.charAt(i) == str2.charAt(j)){
        dp[i][j] = 1 + dfs(i - 1, j - 1, str1, str2, dp);
        ans = Math.max(ans, dp[i][j]);
    }else{
        dp[i][j] = 0 ;
    }
    dfs(i - 1, j, str1, str2, dp);
    dfs(i, j - 1, str1, str2, dp);
    return dp[i][j];
}

int longestCommonSubstr(String S1, String S2, int n, int m){
    int[][]dp = new int[n][m];
    for(int[]row : dp){
        Arrays.fill(row, -1);
    }
    dfs(n - 1, m - 1, S1, S2, dp);
    return ans == Integer.MIN_VALUE ? 0 : ans;
}

}


-1
投票

这是一种递归和自上而下的方法:

       public int lcsSubstr(char[] s1, char[] s2, int m, int n, int c) {
            if (m == 0 || n == 0) {
                return c;
            }
            if (s1[m-1] == s2[n-1]) {
                c = lcsSubstr(s1, s2, m-1, n-1, c+1);
            } else {
                c2 = Math.max(lcsSubstr(s1, s2, m, n - 1, 0), lcsSubstr(s1, s2, m-1, n, 0));
            }
            return Math.max(c, c2);
        }

       public int lcsSubstrMemo(char[] s1, char[] s2, int m, int n, int c, int[][] t) {
            if(m == 0 || n == 0) {
                return c;
            }
            if (t[m-1][n-1] != -1) return t[m-1][n-1];
            if(s1[m - 1] == s2[n - 1]) {
                c = lcsSubstr(s1, s2, m - 1, n - 1, c + 1);
            } else {
                c2 = Math.max(lcsSubstr(s1, s2, m, n - 1, 0), lcsSubstr(s1, s2, m - 1, n, 0));
            }
            t[m - 1][n - 1] = Math.max(c, c2);
            return t[m-1][n-1];
        }

-2
投票

记忆化是指缓存子问题的解决方案以便以后使用。在最长公共子序列问题中,您尝试匹配两个子序列的子字符串以查看它们是否匹配,并在内存中维护已找到的最长子序列。这是您正在寻找的 Java 解决方案(LCS 的记忆版本):

    public class LongestCommonSubsequence {

    private static HashMap<Container, Integer> cache = new HashMap<>();
    private static int count=0, total=0;
    public static void main(String sargs[]){
        Scanner scanner = new Scanner(System.in);
        String x=scanner.nextLine();
        String y=scanner.nextLine();
        int max=0;
        String longest="";
        for(int j=0;j<x.length();j++){
            String common=commonSubsequence(j,0, x, y);
            if(max<common.length()){
                max=common.length();
                longest=common;
            }
        }
        for(int j=0;j<y.length();j++){
            String common=commonSubsequence(j,0, y, x);
            if(max<common.length()){
                max=common.length();
                longest=common;
            }
        }
        System.out.println(longest);
        System.out.println("cache used "+count+" / "+total);
    }
    public static String commonSubsequence(final int startPositionX, int startPositionY, String x, String y){
        StringBuilder commonSubsequence= new StringBuilder();
        for(int i=startPositionX;i<x.length();i++){
            Integer index=find(x.charAt(i),startPositionY,y);
            if(index!=null){
                commonSubsequence.append(x.charAt(i));              
                if(index!=y.length()-1)
                    startPositionY=index+1;
                else 
                    break;
            }               
        }
        return commonSubsequence.toString();        
    }
    public static Integer find(char query, int startIndex, String target){
        Integer pos=cache.get(new Container(query, startIndex));
        total++;
        if(pos!=null){
            count++;
            return pos;
        }else{
            for(int i=startIndex;i<target.length();i++){
                if(target.charAt(i)==query){
                    cache.put(new Container(query, startIndex), i);
                    return i;
                }                   
            }
            return null;
        }           
        }
        }
    class Container{
            private Character toMatch;
            private Integer indexToStartMatch;

        public Container(char t, int i){
        toMatch=t;
        indexToStartMatch=i;
    }

    @Override
    public int hashCode() {
        final int prime = 31;
        int result = 1;
        result = prime
                * result
                + ((indexToStartMatch == null) ? 0 : indexToStartMatch
                        .hashCode());
        result = prime * result + ((toMatch == null) ? 0 : toMatch.hashCode());
        return result;
    }

    @Override
    public boolean equals(Object obj) {
        if (this == obj)
            return true;
        if (obj == null)
            return false;
        if (getClass() != obj.getClass())
            return false;
        Container other = (Container) obj;
        if (indexToStartMatch == null) {
            if (other.indexToStartMatch != null)
                return false;
        } else if (!indexToStartMatch.equals(other.indexToStartMatch))
            return false;
        if (toMatch == null) {
            if (other.toMatch != null)
                return false;
        } else if (!toMatch.equals(other.toMatch))
            return false;
        return true;


        }       
    }
© www.soinside.com 2019 - 2024. All rights reserved.