NoPaste

huffman.py

von paedubucher
SNIPPET_DESC:
Huffman-Kodierung und -Dekodierung
SNIPPET_CREATION_TIME:
18.12.2022 19:36:06
SNIPPET_PRUNE_TIME:
Unendlich

SNIPPET_TEXT:
  1. #!/usr/bin/env python3
  2.  
  3. from collections import namedtuple
  4.  
  5. from bitstream import BitStream
  6.  
  7.  
  8. Node = namedtuple('Node', 'chars weight left right')
  9.  
  10. left = False
  11. right = True
  12.  
  13.  
  14. def count_frequencies(text):
  15.     freqs = {}
  16.     for c in text:
  17.         freqs[c] = freqs.get(c, 0) + 1
  18.     return [Node([c], n, None, None) for c, n in freqs.items()]
  19.  
  20.  
  21. def build_tree(freqs):
  22.     def sortkey(node):
  23.         return (node[1], node[0])
  24.     nodes = sorted(freqs, key=sortkey)
  25.     while (len(nodes) > 1):
  26.         a = nodes[0]
  27.         b = nodes[1]
  28.         c = Node(sorted(a.chars + b.chars), a.weight + b.weight, a, b)
  29.         nodes = sorted([c] + nodes[2:], key=sortkey)
  30.     return nodes[0]
  31.  
  32.  
  33. def encode(text, tree):
  34.     bits = BitStream()
  35.     for c in text:
  36.         bits.write(encode_char(c, tree))
  37.     return bits
  38.  
  39.  
  40. def encode_char(c, tree):
  41.     path = []
  42.     if len(tree.chars) == 1 and c == tree.chars[0]:
  43.         # leaf
  44.         return path
  45.     elif c in tree.chars:
  46.         # node
  47.         if c in tree.left.chars:
  48.             return path + [left] + encode_char(c, tree.left)
  49.         elif c in tree.right.chars:
  50.             return path + [right] + encode_char(c, tree.right)
  51.         else:
  52.             raise ValueError(f'{c} not found in left/right branch of {tree}')
  53.     else:
  54.         raise ValueError(f'{c} not found in {tree}')
  55.  
  56.  
  57. def decode(bits, tree):
  58.     text = ''
  59.     bits = bits.copy()
  60.     while len(bits):
  61.         char, bits = decode_next(bits, tree)
  62.         text += char
  63.     return text
  64.  
  65.  
  66. def decode_next(bits, tree):
  67.     if len(tree.chars) == 1:
  68.         # leaf
  69.         return tree.chars[0], bits
  70.     elif len(bits):
  71.         # node
  72.         bit = bits.read(bool, 1)[0]
  73.         if bit is left:
  74.             return decode_next(bits, tree.left)
  75.         else:
  76.             return decode_next(bits, tree.right)
  77.     else:
  78.         raise ValueError('bits consumed prematurely')
  79.  
  80.  
  81. if __name__ == '__main__':
  82.     text = 'abracadabra'
  83.     freqs = count_frequencies(text)
  84.     tree = build_tree(freqs)
  85.     encoded = encode(text, tree)
  86.     decoded = decode(encoded, tree)
  87.     print(f'Compressed "{text}" as {encoded} in {len(encoded)} bits.')
  88.     print(f'Decompressed {encoded} as "{text}".')

Quellcode

Hier kannst du den Code kopieren und ihn in deinen bevorzugten Editor einfügen. PASTEBIN_DOWNLOAD_SNIPPET_EXPLAIN