我最近一直在研究一些压缩算法,但是在过去的几天里,我在 python 中实现 LZ78 时遇到了一些真正的麻烦。我在网上查找了一些示例,但还没有真正找到任何可靠的编码和解码输入的方法。有人有我可以查看的资源或可以检查的代码吗?
谢谢
这是两者的压缩和解压:
def compress(uncompressed):
"""Compress a string to a list of output symbols."""
# Build the dictionary.
dict_size = 256
dictionary = dict((chr(i), chr(i)) for i in xrange(dict_size))
# in Python 3: dictionary = {chr(i): chr(i) for i in range(dict_size)}
w = ""
result = []
for c in uncompressed:
wc = w + c
if wc in dictionary:
w = wc
else:
result.append(dictionary[w])
# Add wc to the dictionary.
dictionary[wc] = dict_size
dict_size += 1
w = c
# Output the code for w.
if w:
result.append(dictionary[w])
return result
def decompress(compressed):
"""Decompress a list of output ks to a string."""
from cStringIO import StringIO
# Build the dictionary.
dict_size = 256
dictionary = dict((chr(i), chr(i)) for i in xrange(dict_size))
# in Python 3: dictionary = {chr(i): chr(i) for i in range(dict_size)}
# use StringIO, otherwise this becomes O(N^2)
# due to string concatenation in a loop
result = StringIO()
w = compressed.pop(0)
result.write(w)
for k in compressed:
if k in dictionary:
entry = dictionary[k]
elif k == dict_size:
entry = w + w[0]
else:
raise ValueError('Bad compressed k: %s' % k)
result.write(entry)
# Add w+entry[0] to the dictionary.
dictionary[dict_size] = w + entry[0]
dict_size += 1
w = entry
return result.getvalue()
# How to use:
compressed = compress('TOBEORNOTTOBEORTOBEORNOT')
print (compressed)
decompressed = decompress(compressed)
print (decompressed)
希望这有帮助
这是我在 python 3 中实现的来自 https://q4.github.io/thesis.pdf 的 LZW 压缩算法:
from typing import List
def LZW_compress(input:str)->List[int]:
if len(input) == 0:
return []
tree = {chr(65 + i):i for i in range(26)} # tree is initially filled with input alphabet.
output = []
w = ""
for x in input:
if w+x in tree.keys():
w += x
else:
output.append(tree[w])
tree[w+x] = len(tree)
w = x
output.append(tree[w])
return output
def LZW_decompress(input:List[int])->str:
tree = {i:chr(65 + i) for i in range(26)} # tree is initially filled with input alphabet.
output = ""
for i in range(len(input)):
w = tree[input[i]]
if i != 0:
tree[i+25] = tree[i+25] + w[0] # completing the incomplete assignment of prev. round.
w = tree[input[i]] # updating w in case: input[i] == i + 25
output += w
tree[i+26] = w # incomplete set. need to append the first character of the next phrase.
return output
# Example
plain = "HGHSHHBSYBVDHDHGKQQQAAAAAACCCCCCCCCCCCRASSFASASDKKSAHSVNFNNNNNNAHVJSV"
assert(plain == LZW_decompress(LZW_compress(plain)))