我正在学习动态编程,并且遇到了LCS(最长公共子序列)算法。
我已经在Python中实现了它的几个版本,以了解实现之间的区别以及它们如何执行。
这里是代码:
import time
import sys
sys.setrecursionlimit(50000)
def current_milli_time(): return time.time() * 1000
def memoize_decorator(fn):
cache = {}
def inner_fn(*args):
if args in cache:
return cache[args]
ret = fn(*args)
cache[args] = ret
return ret
inner_fn.__name__ = fn.__name__
inner_fn.__doc__ = fn.__doc__
return inner_fn
class bcolors:
HEADER = '\033[95m'
OKBLUE = '\033[94m'
OKGREEN = '\033[92m'
WARNING = '\033[93m'
FAIL = '\033[91m'
ENDC = '\033[0m'
BOLD = '\033[1m'
UNDERLINE = '\033[4m'
class TestORedAssertValue:
def __init__(self, *ored_assert_values):
self.__values = ored_assert_values
def assert_value(self, value):
for self_value in self.__values:
if self_value == value:
return True
return False
def values(self):
return list(self.__values)
def test_assert(label, value, assertValue, *args):
ok = False
assertToPrint = assertValue
if isinstance(assertValue, TestORedAssertValue):
ok = assertValue.assert_value(value)
assertToPrint = " OR ".join(map(str, list(assertValue.values())))
elif value == assertValue:
ok = True
if ok:
print('#', label, ' - ', bcolors.OKGREEN, 'ok', bcolors.ENDC, sep='')
else:
print('#', label, ' - args: ', ", ".join(map(str, args)), ', expected: ', assertToPrint,
', got: ', value, ' - ', bcolors.FAIL, 'fail', bcolors.ENDC, sep='')
def measure_time_decorator(fn):
def inner(*args):
start = current_milli_time()
ret = fn(*args)
end = current_milli_time()
print('time: ', end - start, ' ms', sep='', end=' - ')
return ret
inner.__name__ = fn.__name__
inner.__doc__ = fn.__doc__
return inner
def test(fn):
print()
print("Testing:", fn.__name__)
fn = measure_time_decorator(fn)
A = ['A', 'C', 'B', 'D', 'E', 'G', 'C', 'E', 'D', 'B', 'G']
B = ['B', 'E', 'G', 'C', 'F', 'E', 'U', 'B', 'K']
assertRes = ['B', 'E', 'G', 'C', 'E', 'B']
res = fn(A, B)
test_assert('testcase 1', res, assertRes, A, B)
A = ['A', 'B']
B = []
assertRes = []
res = fn(A, B)
test_assert('testcase 2', res, assertRes, A, B)
A = []
B = []
assertRes = []
res = fn(A, B)
test_assert('testcase 3', res, assertRes, A, B)
A = [1, 2]
B = [1, 2]
assertRes = [1, 2]
res = fn(A, B)
test_assert('testcase 4', res, assertRes, A, B)
A = [1, 2]
B = [1, 2]
assertRes = [1, 2]
res = fn(A, B)
test_assert('testcase 5', res, assertRes, A, B)
A = ['A', 'B', 'C', 'E', 'F', 'G', 'H', 'I', 'L']
B = ['A', 'B', 'C', 'E', 'F', 'G', 'H', 'I', 'L']
assertRes = ['A', 'B', 'C', 'E', 'F', 'G', 'H', 'I', 'L']
res = fn(A, B)
test_assert('testcase 6', res, assertRes, A, B)
A = [x for x in range(3000)]
B = A
assertRes = A
res = fn(A, B)
test_assert('testcase 7', res, assertRes, A, B)
A = [x for x in range(3000)]
B = list(reversed(A))
assertRes = TestORedAssertValue([0], [2999])
res = fn(A, B)
test_assert('testcase 8', res, assertRes, A, B)
def longest_common_subsequence(A, B):
N = len(A)
M = len(B)
res_matrix = [[[]] * (M + 1) for i in range(N + 1)]
for i in range(1, N + 1):
for j in range(1, M + 1):
if A[i - 1] == B[j - 1]:
res_matrix[i][j] = res_matrix[i - 1][j - 1] + [A[i - 1]]
else:
res_matrix[i][j] = res_matrix[i][j - 1] if (
len(res_matrix[i][j - 1])
>
len(res_matrix[i - 1][j])
) else res_matrix[i - 1][j]
return res_matrix[-1][-1]
def longest_common_subsequence_recursive_memoized(A, B):
N = len(A)
M = len(B)
if N <= 0 or M <= 0:
return []
res_matrix = [[[]] * M for i in range(N)]
@memoize_decorator
def recursion(i, j):
if i <= -1 or j <= -1:
return []
elif A[i] == B[j]:
res_matrix[i][j] = recursion(i - 1, j - 1) + [A[i]]
else:
prev1 = recursion(i - 1, j)
prev2 = recursion(i, j - 1)
res_matrix[i][j] = prev1 if (
len(prev1)
>
len(prev2)
) else prev2
return res_matrix[i][j]
recursion(N - 1, M - 1)
return res_matrix[-1][-1]
def longest_common_subsequence_recursive_memoized_mit(A, B):
N = len(A)
M = len(B)
if N <= 0 or M <= 0:
return []
res_matrix = [[None] * M for i in range(N)]
def lcs(i, j):
if i <= -1 or j <= -1:
return []
if res_matrix[i][j] == None:
if A[i] == B[j]:
res_matrix[i][j] = lcs(i - 1, j - 1) + [A[i]]
else:
prev1 = lcs(i - 1, j)
prev2 = lcs(i, j - 1)
res_matrix[i][j] = prev1 if (
len(prev1)
>
len(prev2)
) else prev2
return res_matrix[i][j]
return lcs(N - 1, M - 1)
if __name__ == "__main__":
test(longest_common_subsequence_recursive_memoized)
test(longest_common_subsequence_recursive_memoized_mit)
test(longest_common_subsequence)
print()
重要功能是:
longest_common_subsequence_recursive_memoized
:使用带递归的memoize_decorator
;
longest_common_subsequence_recursive_memoized_mit
:将递归与通过直接检查res_matrix
来实现的备忘录一起使用(此启发来自MIT讲座-> https://youtu.be/V5hZoJ6uK-s?t=3228);
longest_common_subsequence
:使用n * m
矩阵的动态编程实现,无需递归;
如果运行上面的代码(例如python longest_common_subsequence.py
),则会看到类似于以下内容的输出:
$ python longest_common_subsequence.py
Testing: longest_common_subsequence_recursive_memoized
time: 0.22802734375 ms - #testcase 1 - ok
time: 0.0048828125 ms - #testcase 2 - ok
time: 0.003173828125 ms - #testcase 3 - ok
time: 0.02099609375 ms - #testcase 4 - ok
time: 0.01806640625 ms - #testcase 5 - ok
time: 0.046875 ms - #testcase 6 - ok
time: 328.40087890625 ms - #testcase 7 - ok
time: 105788.96801757812 ms - #testcase 8 - ok
Testing: longest_common_subsequence_recursive_memoized_mit
time: 0.22607421875 ms - #testcase 1 - ok
time: 0.0048828125 ms - #testcase 2 - ok
time: 0.003173828125 ms - #testcase 3 - ok
time: 0.031005859375 ms - #testcase 4 - ok
time: 0.01416015625 ms - #testcase 5 - ok
time: 0.041015625 ms - #testcase 6 - ok
time: 255.93994140625 ms - #testcase 7 - ok
time: 26466.174072265625 ms - #testcase 8 - ok
Testing: longest_common_subsequence
time: 0.159912109375 ms - #testcase 1 - ok
time: 0.011962890625 ms - #testcase 2 - ok
time: 0.009033203125 ms - #testcase 3 - ok
time: 0.015869140625 ms - #testcase 4 - ok
time: 0.015869140625 ms - #testcase 5 - ok
time: 0.1279296875 ms - #testcase 6 - ok
time: 10227.974853515625 ms - #testcase 7 - ok
time: 9605.087158203125 ms - #testcase 8 - ok
[有趣的部分是testcase 8
。您可以看到,对于此测试用例,longest_common_subsequence_recursive_memoized
的性能较差(105788.96801757812 ms ~= 105.8 seconds
),而对于其他两个功能,其花费不超过30秒(longest_common_subsequence
是最佳的,大约需要10秒才能完成)。
我的问题是:为什么longest_common_subsequence_recursive_memoized
对于测试用例8表现不佳,而实现却与longest_common_subsequence_recursive_memoized_mit
十分相似?
尽管它仍然使用备忘录,而不是直接访问res_matrix
并从那里返回值,它使用了一个装饰器,该装饰器包装了递归函数以缓存计算结果,并在需要之前已经进行计算的情况下立即返回它们。
谢谢您的关注。
我的问题是:为什么longest_common_subsequence_recursive_memoized对于测试用例8而言,它的性能如此之差,而实现却相当类似于longest_common_subsequence_recursive_memoized_mit?
我发现两者之间的主要性能差异是MIT版本具有:
res_matrix = [[None] * M for i in range(N)]
...
if res_matrix[i][j] == None:
您使用时:
res_matrix = [[[]] * M for i in range(N)]
并且没有等效测试。如果我们修改您的文件以包含与MIT相同的初始化和测试,并用functools中Python自己的rlu_cache()
装饰,那么我们可以查询缓存,我们得到:
CacheInfo(hits=380620, misses=17610383, maxsize=128, currsize=128)
time: 19195.7060546875 ms - #testcase 8 - ok
将缓存大小从默认值128增加可以提高性能:
CacheInfo(hits=8988004, misses=9002999, maxsize=4096, currsize=4096)
time: 16645.57080078125 ms - #testcase 8 - ok
但是到一点都没有任何区别。