import bs4 import requests import re import sys from timeit import timeit # HTML des "Netzwerk"-Unterforums dfde = requests.get("https://debianforum.de/forum/viewforum.php?f=30").text def parse(html): soup = bs4.BeautifulSoup(dfde, 'html.parser') threads = soup.select("ul.topiclist li.row") for thread in threads: title = thread.select("a.topictitle")[0].text date = thread.select("div.topic-poster")[0].text date = re.search("[0-9][0-9.: ]{18}", date).group(0) username = thread.select("div.topic-poster a")[0].text answers = thread.find("dd", class_="posts").text yield ";".join([title, date, username, answers.split(" ")[0]]) if __name__ == "__main__": if "timeit" in sys.argv: timing_results = timeit(lambda: parse(dfde)) print(timing_results) else: for thread in parse(dfde): print(thread)