问题: 给定一个未排序元素列表,我们必须找到最长连续元素序列的长度。
预期时间复杂度: O(N)
例如: [4,7,1,100,28,2,3]
输出: 4 因为最长的连续元素序列是上面列表中的 [1,2,3,4]
我已经用 Python 编写了代码。我已经使用 set 和 map (defaultdict) 来解决这个问题。
from collections import defaultdict as maps
class Solution:
def longestConsecutive(self, nums: List[int]) -> int:
nums = set(nums); # to remove duplicates
n = len(nums);
if n == 0: return 0;
if n == 1: return 1;
present = maps(int);
# Inserting values into map (defaultdict) as key
for i in nums:
present[i] = 1; # to check for presence of element
i = 0; curr = 0; cnt, maxx = 0, 0; visset = {i for i in nums}
while n > 0:
#I had to use this while loop since the for-loop below will get interrupted if we make any changes to the iterating set.
cnt = 1;
for ind in visset:
curr = ind; n -= 1;
visset.remove(ind); break; #remove is O(1) complexity for set
# values less than curr::
curr, temp = curr, curr;
while n > 0:
if present[curr - 1]:
curr -= 1; n -= 1;
visset.remove(curr); cnt += 1; #remove is O(1) complexity for set
else:
break;
# values greater than curr::
while n > 0:
if present[temp + 1]:
temp += 1; n -= 1;
visset.remove(temp); cnt += 1; #remove is O(1) complexity for set
else:
break;
maxx = max(cnt, maxx);
maxx = max(cnt, maxx);
return maxx
谁能解释为什么这段代码的运行时间比预期的要高?
更新: 该程序的运行时间约为 7000 毫秒,如果我将 set 替换为字典并在 set 的情况下使用 del dictt[i] 而不是 remove(),运行时间将降至 2000ms
问题可能源于
for ind in visset
。我以前观察过这个(对于字典)但从来没有费心去衡量它。从集合中删除元素后迭代集合似乎会产生一些开销。
from time import time
s = set(range(1_000_000))
x = set(range(1_000_000))
start = time()
for i in range(100_000):
x.remove(i)
for i in s: break
print(time()-start) # 0.017419099807739258
start = time()
for i in range(100_000):
s.remove(i)
for i in s: break
print(time()-start) # 2.6430912017822266
也许不删除项目的解决方案会运行得更快:
def longestConsec(nums):
forward = dict(zip(nums,nums))
backward = dict(zip(nums,nums))
for n in nums:
f = forward.get(n+1,n)
b = backward.get(n-1,n)
forward[b] = f
backward[f] = b
return max(f-b+1 for f,b in backward.items())
你的在 leetcode 上通过了未修改的测试,但速度较慢。这是对相同代码的简化,它的性能比平均水平好一点:
class Solution:
def longestConsecutive(self, nums: List[int]) -> int:
present = set(nums)
curr = 0
cnt, maxx = 0, 0
while present:
cnt = 1
start = curr = present.pop()
while start - 1 in present:
start -= 1
cnt += 1
present.remove(start)
while curr + 1 in present:
curr += 1
cnt += 1
present.remove(curr)
maxx = max(cnt, maxx)
return maxx
你确实有很多不必要的数据结构,并且在甚至没有使用值的地方重复映射。我用一套替换了它们。
基于我们可以测试列表中是否有前一个数字和下一个数字这一事实,我们可以构建并跟踪项目的运行。
def longest_consecutive(numbers):
numbers = set(numbers)
best_length = 0
best_start = -1
for number in numbers:
## ----------------
## this run is/will already accounted for
## ----------------
if number - 1 in numbers:
continue
## ----------------
## ----------------
## find consecutive numbers
## ----------------
i = number
while i in numbers:
i += 1
## ----------------
## ----------------
## record the steak if better than what we have seen
## ----------------
if i - number > best_length:
best_length = i - number
best_start = number
## ----------------
return list(range(best_start, best_start + best_length))
基于
timeit
这将在大约1/4秒内完成一百万个随机整数的列表。
import timeit
setup = """
import random
magnitude = 1_000_000
my_list = [random.randint(1, magnitude) for _ in range(magnitude)]
def longest_consecutive(numbers):
numbers = set(numbers)
best_length = 0
best_start = -1
for number in numbers:
## ----------------
## this run is/will already accounted for
## ----------------
if number - 1 in numbers:
continue
## ----------------
## ----------------
## find consecutive numbers
## ----------------
i = number
while i in numbers:
i += 1
## ----------------
## ----------------
## record the steak if better than what we have seen
## ----------------
if i - number > best_length:
best_length = i - number
best_start = number
## ----------------
return list(range(best_start, best_start + best_length))
"""
print(timeit.timeit("longest_consecutive(my_list)", setup=setup, number = 100))
在我的笔记本电脑上,结果如下:
26.6412 seconds for 100 runs of a million random integers.