rc3-html-infobeamer/fahrplan.py

131 lines
4.8 KiB
Python

import datetime
import json
import os
import sys
import re
import glob
import shutil
import errno
import subprocess
from lxml import etree
from urllib.request import urlopen
scheduleUrl = 'http://data.c3voc.de/rC3/everything.schedule.xml'
scheduleTree=None
# Download the Events-Schedule and parse all Events out of it. Yield a tupel for each Event
def downloadSchedule(scheduleUrl):
print("downloading schedule")
# download the schedule
response = urlopen(scheduleUrl)
# read xml-source
xml = response.read()
# parse into ElementTree
parser = etree.XMLParser(huge_tree=True)
return etree.fromstring(xml, parser)
def getSchedule(scheduleUrl):
global scheduleTree
if not scheduleTree:
scheduleTree=downloadSchedule(scheduleUrl)
return scheduleTree
def persons(scheduleUrl, personmap={}, taglinemap={}, forEventId=None):
schedule = getSchedule(scheduleUrl)
# iterate all days
for day in schedule.iter('day'):
# iterate all rooms
for room in day.iter('room'):
# iterate events on that day in this room
for event in room.iter('event'):
eventid = int(event.get("id"))
if event != None and not eventid == forEventId:
continue
# aggregate names of the persons holding this talk
persons_seen = []
if event.find('persons') is not None:
for person in event.find('persons').iter('person'):
id = int(person.get("id"))
person = re.sub(r'\s+', ' ', person.text).strip()
match = re.search(r'\((.*?)\)', person)
tagline = ''
if not match is None:
tagline = match.group(1)
person = person.split(" (")[0]
if id in taglinemap:
tagline = taglinemap[id]
if id in personmap:
person = personmap[id]
if not id in persons_seen:
persons_seen.append(id)
yield {
'id': id,
'person': person,
'tagline': tagline
}
def events(scheduleUrl, titlemap={}):
schedule = getSchedule(scheduleUrl)
# iterate all days
for day in schedule.iter('day'):
# iterate all rooms
for room in day.iter('room'):
# iterate events on that day in this room
for event in room.iter('event'):
# aggregate names of the persons holding this talk
personnames = []
if event.find('persons') is not None:
for person in event.find('persons').iter('person'):
personname = re.sub(r'\s+', ' ', person.text).strip()
personnames.append(personname)
id = int(event.get('id'))
if id in titlemap:
title = titlemap[id]
elif event.find('title') is not None and event.find('title').text is not None:
title = re.sub(r'\s+', ' ', event.find('title').text).strip()
else:
title = ''
if event.find('subtitle') is not None and event.find('subtitle').text is not None:
subtitle = re.sub(r'\s+', ' ', event.find('subtitle').text).strip()
else:
subtitle = ''
if event.find('start') is not None and event.find('start').text is not None:
start = re.sub(r'\s+', ' ', event.find('start').text).strip()
else:
start = ''
# yield a tupel with the event-id, event-title and person-names
yield {
'day': day.get('index'),
'id': id,
'title': title,
'subtitle': subtitle,
#'persons': personnames,
'personnames': ', '.join(personnames),
'room': room.attrib['name'],
'track': event.find('track').text,
'start': event.find('start').text,
'datetime': event.find('date').text,
'roomguid': room.attrib['guid'] if 'guid' in room.attrib else '',
#'url': event.find('url').text
}
if __name__ == "__main__":
events = list(events(scheduleUrl))
events = sorted(events, key=lambda x: datetime.datetime.strptime(x["datetime"], "%Y-%m-%dT%H:%M:%S%z"))
with open("fahrplan.json", "w") as f:
json.dump(events, f)