在python中实现LZ78压缩算法

Question

我最近一直在研究一些压缩算法，但是在过去的几天里，我在 python 中实现 LZ78 时遇到了一些真正的麻烦。我在网上查找了一些示例，但还没有真正找到任何可靠的编码和解码输入的方法。有人有我可以查看的资源或可以检查的代码吗？

谢谢

Answer 1

这是两者的压缩和解压：

def compress(uncompressed):
    """Compress a string to a list of output symbols."""

    # Build the dictionary.
    dict_size = 256
    dictionary = dict((chr(i), chr(i)) for i in xrange(dict_size))
    # in Python 3: dictionary = {chr(i): chr(i) for i in range(dict_size)}

    w = ""
    result = []
    for c in uncompressed:
        wc = w + c
        if wc in dictionary:
            w = wc
        else:
            result.append(dictionary[w])
            # Add wc to the dictionary.
            dictionary[wc] = dict_size
            dict_size += 1
            w = c

    # Output the code for w.
    if w:
        result.append(dictionary[w])
    return result


def decompress(compressed):
    """Decompress a list of output ks to a string."""
    from cStringIO import StringIO

    # Build the dictionary.
    dict_size = 256
    dictionary = dict((chr(i), chr(i)) for i in xrange(dict_size))
    # in Python 3: dictionary = {chr(i): chr(i) for i in range(dict_size)}

    # use StringIO, otherwise this becomes O(N^2)
    # due to string concatenation in a loop
    result = StringIO()
    w = compressed.pop(0)
    result.write(w)
    for k in compressed:
        if k in dictionary:
            entry = dictionary[k]
        elif k == dict_size:
            entry = w + w[0]
        else:
            raise ValueError('Bad compressed k: %s' % k)
        result.write(entry)

        # Add w+entry[0] to the dictionary.
        dictionary[dict_size] = w + entry[0]
        dict_size += 1

        w = entry
    return result.getvalue()


# How to use:
compressed = compress('TOBEORNOTTOBEORTOBEORNOT')
print (compressed)
decompressed = decompress(compressed)
print (decompressed)

希望这有帮助

Answer 2

这是我在 python 3 中实现的来自 https://q4.github.io/thesis.pdf 的 LZW 压缩算法：

from typing import List

def LZW_compress(input:str)->List[int]:
    if len(input) == 0:
        return []
    tree = {chr(65 + i):i for i in range(26)} # tree is initially filled with input alphabet.
    output = []
    w = ""
    for x in input:
        if w+x in tree.keys():
            w += x
        else:
            output.append(tree[w])
            tree[w+x] = len(tree)
            w = x
    output.append(tree[w])
    return output


def LZW_decompress(input:List[int])->str:
    tree = {i:chr(65 + i) for i in range(26)} # tree is initially filled with input alphabet.
    output = ""
    for i in range(len(input)):
        w =  tree[input[i]]
        if i != 0:
            tree[i+25] = tree[i+25] + w[0] # completing the incomplete assignment of prev. round.
        w = tree[input[i]] # updating w in case: input[i] == i + 25 
        output += w
        tree[i+26] = w # incomplete set. need to append the first character of the next phrase.
    return output

# Example  
plain = "HGHSHHBSYBVDHDHGKQQQAAAAAACCCCCCCCCCCCRASSFASASDKKSAHSVNFNNNNNNAHVJSV"
assert(plain == LZW_decompress(LZW_compress(plain)))

在python中实现LZ78压缩算法

问题描述投票：0回答：2

2个回答

最新问题

在python中实现LZ78压缩算法

问题描述 投票：0回答：2

2个回答

最新问题

问题描述投票：0回答：2