Spendenstatistiken
von TRex- SNIPPET_TEXT:
-
- # coding: utf-8
- # In[1]:
- years = range(2003, 2016)
- donations_url = "https://wiki.debianforum.de/Debianforum.de/Spenden"
- expenses_url = "https://wiki.debianforum.de/Debianforum.de/Ausgaben"
- # In[2]:
- import urllib
- urls = {year: (donations_url + '/' + str(year) if year < years[-1] else donations_url) for year in years}
- y = dict()
- for year, url in urls.items():
- f = urllib.urlopen(url)
- y[year] = f.read()
- expense_html = urllib.urlopen(expenses_url).read()
- # In[3]:
- def parse_html_to_list(html):
- """
- Parse HTML table to list of tuples, expecting three cells with date, note, amount in each row.
- """
- data = []
- parser = etree.HTMLParser()
- tree = etree.parse(StringIO(html.decode('utf-8')), parser)
- rows = tree.xpath('//table//tr')
- for index, row in enumerate(rows):
- cells = row.getchildren()
- if len(cells) == 3 and index > 0:
- date = cells[0].text.strip()
- try:
- date = datetime.strptime(date, "%d.%m.%Y")
- except Exception as e:
- continue
- note = cells[1].xpath(".//a/text()")
- if not note:
- note = cells[1].xpath(".//text()")
- amount = float(cells[2].text.strip().replace(",","."))
- data.append((date, note[0].strip(), amount))
- return data
- # In[4]:
- from io import StringIO, BytesIO
- from lxml import etree
- from datetime import datetime
- donations = []
- for year, donate_html in y.items():
- yearly_list = parse_html_to_list(donate_html)
- donations += yearly_list
- donations.sort()
- # In[5]:
- expenses = parse_html_to_list(expense_html)
- # In[6]:
- sum([x[2] for x in expenses])
- # In[7]:
- sum([x[2] for x in donations])
- # In[8]:
- sum([x[2] for x in donations]) - sum([x[2] for x in expenses])
- # Donations/month
- # In[9]:
- from itertools import groupby
- import numpy as np
- import matplotlib.pyplot as plt
- from datetime import date, timedelta
- get_ipython().magic(u'matplotlib inline')
- def _groupy(item):
- return item[0].year, item[0].month
- def get_date_grouped_list(list_):
- agg_list = []
- for ( (year, month), items ) in groupby( list_, _groupy ):
- agg_list.append( ( date(year, month, 1), sum([x[2] for x in items])) )
- return agg_list
- donations_prefix = [(date(donations[0][0].year, x+1, 1), '', 0) for x in range(0, donations[0][0].month-1)] # cheating. Now even better.
- agg_donations = get_date_grouped_list(donations_prefix + donations)
- agg_expenses = get_date_grouped_list(donations_prefix + expenses)
- agg_donations.sort()
- agg_expenses.sort()
- plt.figure(figsize=(15,7))
- # donations
- plt.plot([x[0] for x in agg_donations], [x[1] for x in agg_donations], color="green", label="Spenden")
- # expenses
- plt.plot([x[0] for x in agg_expenses], [x[1] for x in agg_expenses], color="red", label="Ausgaben")
- # spartopf
- cash_available = []
- cash = 0
- d = dict(agg_donations)
- # shitty workaround: this month is missing. probably a bug in the source table.
- for k,v in agg_expenses + [(date(2009, 3, 1), 0)]:
- donation = d.pop(k, 0)
- cash = cash - v + donation
- cash_available.append((k, cash))
- cash_available.sort()
- plt.plot([x[0] for x in cash_available], [x[1] for x in cash_available], color="blue", label="Spendenkonto")
- plt.xlabel("Datum")
- plt.ylabel("Euro")
- plt.legend()
- # In[10]:
- # csv thing
- for x in agg_plot:
- print "%s;%.2f" % (x[0].strftime("%Y-%m-%d"), x[1])
- # In[ ]:
- # csv thing
- for x in donations:
- print "%s;\"%s\";%.2f" % (x[0].strftime("%Y-%m-%d"), x[1], x[2])
Quellcode
Hier kannst du den Code kopieren und ihn in deinen bevorzugten Editor einfügen. PASTEBIN_DOWNLOAD_SNIPPET_EXPLAIN