NoPaste

Spendenstatistiken

von TRex

SNIPPET_TEXT:
  1. # coding: utf-8
  2.  
  3. # In[1]:
  4.  
  5. years = range(2003, 2016)
  6. donations_url = "https://wiki.debianforum.de/Debianforum.de/Spenden"
  7. expenses_url = "https://wiki.debianforum.de/Debianforum.de/Ausgaben"
  8.  
  9.  
  10. # In[2]:
  11.  
  12. import urllib
  13. urls = {year: (donations_url + '/' + str(year) if year < years[-1] else donations_url) for year in years}
  14.  
  15. y = dict()
  16. for year, url in urls.items():
  17.     f = urllib.urlopen(url)
  18.     y[year] = f.read()
  19.    
  20. expense_html = urllib.urlopen(expenses_url).read()
  21.  
  22.  
  23. # In[3]:
  24.  
  25. def parse_html_to_list(html):
  26.     """
  27.    Parse HTML table to list of tuples, expecting three cells with date, note, amount in each row.
  28.    """
  29.     data = []
  30.     parser = etree.HTMLParser()
  31.     tree = etree.parse(StringIO(html.decode('utf-8')), parser)
  32.     rows = tree.xpath('//table//tr')
  33.     for index, row in enumerate(rows):
  34.         cells = row.getchildren()
  35.         if len(cells) == 3 and index > 0:
  36.             date = cells[0].text.strip()
  37.             try:
  38.                 date = datetime.strptime(date, "%d.%m.%Y")
  39.             except Exception as e:
  40.                 continue
  41.             note = cells[1].xpath(".//a/text()")
  42.             if not note:
  43.                 note = cells[1].xpath(".//text()")
  44.             amount = float(cells[2].text.strip().replace(",","."))
  45.  
  46.             data.append((date, note[0].strip(), amount))
  47.     return data
  48.    
  49.  
  50.  
  51. # In[4]:
  52.  
  53. from io import StringIO, BytesIO
  54. from lxml import etree
  55. from datetime import datetime
  56.  
  57.  
  58. donations = []
  59.  
  60. for year, donate_html in y.items():
  61.     yearly_list = parse_html_to_list(donate_html)
  62.     donations += yearly_list
  63. donations.sort()
  64.  
  65.  
  66.  
  67. # In[5]:
  68.  
  69. expenses = parse_html_to_list(expense_html)
  70.  
  71.  
  72. # In[6]:
  73.  
  74. sum([x[2] for x in expenses])
  75.  
  76.  
  77. # In[7]:
  78.  
  79. sum([x[2] for x in donations])
  80.  
  81.  
  82. # In[8]:
  83.  
  84. sum([x[2] for x in donations]) - sum([x[2] for x in expenses])
  85.  
  86.  
  87. # Donations/month
  88.  
  89. # In[9]:
  90.  
  91. from itertools import groupby
  92. import numpy as np
  93. import matplotlib.pyplot as plt
  94. from datetime import date, timedelta
  95. get_ipython().magic(u'matplotlib inline')
  96.  
  97. def _groupy(item):
  98.     return item[0].year, item[0].month
  99.  
  100. def get_date_grouped_list(list_):
  101.     agg_list = []
  102.     for ( (year, month), items ) in groupby( list_, _groupy ):
  103.         agg_list.append( ( date(year, month, 1), sum([x[2] for x in items])) )
  104.     return agg_list
  105.  
  106. donations_prefix = [(date(donations[0][0].year, x+1, 1), '', 0) for x in range(0, donations[0][0].month-1)] # cheating. Now even better.
  107. agg_donations = get_date_grouped_list(donations_prefix + donations)
  108. agg_expenses = get_date_grouped_list(donations_prefix + expenses)
  109.  
  110. agg_donations.sort()
  111. agg_expenses.sort()
  112.  
  113. plt.figure(figsize=(15,7))
  114. # donations
  115. plt.plot([x[0] for x in agg_donations], [x[1] for x in agg_donations], color="green", label="Spenden")
  116. # expenses
  117. plt.plot([x[0] for x in agg_expenses], [x[1] for x in agg_expenses], color="red", label="Ausgaben")
  118.  
  119. # spartopf
  120. cash_available = []
  121. cash = 0
  122. d = dict(agg_donations)
  123. # shitty workaround: this month is missing. probably a bug in the source table.
  124. for k,v in agg_expenses + [(date(2009, 3, 1), 0)]:
  125.     donation = d.pop(k, 0)
  126.     cash = cash - v + donation
  127.    
  128.     cash_available.append((k, cash))
  129. cash_available.sort()
  130.  
  131. plt.plot([x[0] for x in cash_available], [x[1] for x in cash_available], color="blue", label="Spendenkonto")
  132. plt.xlabel("Datum")
  133. plt.ylabel("Euro")
  134. plt.legend()
  135.  
  136.  
  137.  
  138. # In[10]:
  139.  
  140. # csv thing
  141. for x in agg_plot:
  142.     print "%s;%.2f" % (x[0].strftime("%Y-%m-%d"), x[1])
  143.  
  144.  
  145. # In[ ]:
  146.  
  147. # csv thing
  148. for x in donations:
  149.     print "%s;\"%s\";%.2f" % (x[0].strftime("%Y-%m-%d"), x[1], x[2])

Quellcode

Hier kannst du den Code kopieren und ihn in deinen bevorzugten Editor einfügen. PASTEBIN_DOWNLOAD_SNIPPET_EXPLAIN