dfde-threads.py
von paedubucher- SNIPPET_DESC:
- Threadinformationen auslesen
- SNIPPET_CREATION_TIME:
- 20.06.2022 21:14:34
- SNIPPET_PRUNE_TIME:
- Unendlich
- SNIPPET_TEXT:
-
- #!/usr/bin/env python3
- from datetime import datetime
- import re
- import requests
- import sys
- def get_forum_text(forum_id):
- url = f'https://debianforum.de/forum/viewforum.php?f={forum_id}'
- return requests.get(url).text
- def group_lines_to_thread_sections(text):
- start = re.compile(r'<li class="row bg[12]"')
- buf = []
- threads = []
- for line in text.split('\n'):
- if start.search(line):
- if buf:
- threads.append(buf)
- buf = []
- buf.append(line.strip())
- threads.append(buf)
- return threads
- if __name__ == '__main__':
- if len(sys.argv) < 2:
- print(f'usage: {sys.argv[0]} [forum_id]')
- sys.exit(1)
- dev_forum_id = int(sys.argv[1])
- full_text = get_forum_text(dev_forum_id)
- thread_sections = group_lines_to_thread_sections(full_text)
- title_re = re.compile(r'class="topictitle">([^<]+)</a>')
- answers_re = re.compile(r'<strong>([0-9]+)</strong>')
- starter_re = re.compile(r'>([^<]+)</a>')
- date_re = re.compile(r'(\d{2}\.\d{2}\.\d{4} \d{1,2}:\d{2}:\d{2})$')
- date_fmt = '%d.%m.%Y %H:%M:%S'
- extract_from_lineno_re_conv = {
- 'title': (4, title_re, lambda t: t),
- 'n_answers': (10, answers_re, lambda n: int(n)),
- 'starter': (13, starter_re, lambda s: s),
- 'date': (13, date_re, lambda d: datetime.strptime(d, date_fmt)),
- }
- entries = []
- for sec in thread_sections:
- entry = {}
- for field, (n, r, f) in extract_from_lineno_re_conv.items():
- m = r.search(sec[n])
- entry[field] = f(m[1]) if m else '???'
- if entry and 'date' in entry and entry['date'] != '???':
- entries.append(entry)
- for e in sorted(entries, key=lambda e: e['date'], reverse=True):
- au = e['starter']
- at = e['date'].strftime(date_fmt)
- ti = e['title']
- an = e['n_answers']
- print(f'{au} ({at}): {ti} ({an} Antworten)')
Quellcode
Hier kannst du den Code kopieren und ihn in deinen bevorzugten Editor einfügen. PASTEBIN_DOWNLOAD_SNIPPET_EXPLAIN