#!/usr/bin/env python3 from datetime import datetime import re import requests import sys def get_forum_text(forum_id): url = f'https://debianforum.de/forum/viewforum.php?f={forum_id}' return requests.get(url).text def group_lines_to_thread_sections(text): start = re.compile(r'
  • ([^<]+)') answers_re = re.compile(r'([0-9]+)') starter_re = re.compile(r'>([^<]+)') date_re = re.compile(r'(\d{2}\.\d{2}\.\d{4} \d{1,2}:\d{2}:\d{2})$') date_fmt = '%d.%m.%Y %H:%M:%S' extract_from_lineno_re_conv = { 'title': (4, title_re, lambda t: t), 'n_answers': (10, answers_re, lambda n: int(n)), 'starter': (13, starter_re, lambda s: s), 'date': (13, date_re, lambda d: datetime.strptime(d, date_fmt)), } entries = [] for sec in thread_sections: entry = {} for field, (n, r, f) in extract_from_lineno_re_conv.items(): m = r.search(sec[n]) entry[field] = f(m[1]) if m else '???' if entry and 'date' in entry and entry['date'] != '???': entries.append(entry) for e in sorted(entries, key=lambda e: e['date'], reverse=True): au = e['starter'] at = e['date'].strftime(date_fmt) ti = e['title'] an = e['n_answers'] print(f'{au} ({at}): {ti} ({an} Antworten)')