NoPaste

dfde-threads.py

von paedubucher
SNIPPET_DESC:
Threadinformationen auslesen
SNIPPET_CREATION_TIME:
20.06.2022 21:14:34
SNIPPET_PRUNE_TIME:
Unendlich

SNIPPET_TEXT:
  1. #!/usr/bin/env python3
  2.  
  3. from datetime import datetime
  4. import re
  5. import requests
  6. import sys
  7.  
  8.  
  9. def get_forum_text(forum_id):
  10.     url = f'https://debianforum.de/forum/viewforum.php?f={forum_id}'
  11.     return requests.get(url).text
  12.  
  13.  
  14. def group_lines_to_thread_sections(text):
  15.     start = re.compile(r'<li class="row bg[12]"')
  16.     buf = []
  17.     threads = []
  18.     for line in text.split('\n'):
  19.         if start.search(line):
  20.             if buf:
  21.                 threads.append(buf)
  22.                 buf = []
  23.         buf.append(line.strip())
  24.     threads.append(buf)
  25.     return threads
  26.  
  27.  
  28. if __name__ == '__main__':
  29.     if len(sys.argv) < 2:
  30.         print(f'usage: {sys.argv[0]} [forum_id]')
  31.         sys.exit(1)
  32.  
  33.     dev_forum_id = int(sys.argv[1])
  34.     full_text = get_forum_text(dev_forum_id)
  35.     thread_sections = group_lines_to_thread_sections(full_text)
  36.  
  37.     title_re = re.compile(r'class="topictitle">([^<]+)</a>')
  38.     answers_re = re.compile(r'<strong>([0-9]+)</strong>')
  39.     starter_re = re.compile(r'>([^<]+)</a>')
  40.     date_re = re.compile(r'(\d{2}\.\d{2}\.\d{4} \d{1,2}:\d{2}:\d{2})$')
  41.     date_fmt = '%d.%m.%Y %H:%M:%S'
  42.  
  43.     extract_from_lineno_re_conv = {
  44.         'title': (4, title_re, lambda t: t),
  45.         'n_answers': (10, answers_re, lambda n: int(n)),
  46.         'starter': (13, starter_re, lambda s: s),
  47.         'date': (13, date_re, lambda d: datetime.strptime(d, date_fmt)),
  48.     }
  49.  
  50.     entries = []
  51.     for sec in thread_sections:
  52.         entry = {}
  53.         for field, (n, r, f) in extract_from_lineno_re_conv.items():
  54.             m = r.search(sec[n])
  55.             entry[field] = f(m[1]) if m else '???'
  56.         if entry and 'date' in entry and entry['date'] != '???':
  57.             entries.append(entry)
  58.  
  59.     for e in sorted(entries, key=lambda e: e['date'], reverse=True):
  60.         au = e['starter']
  61.         at = e['date'].strftime(date_fmt)
  62.         ti = e['title']
  63.         an = e['n_answers']
  64.         print(f'{au} ({at}): {ti} ({an} Antworten)')

Quellcode

Hier kannst du den Code kopieren und ihn in deinen bevorzugten Editor einfügen. PASTEBIN_DOWNLOAD_SNIPPET_EXPLAIN