rc3-html-infobeamer/fahrplan.py

153 lines
5.8 KiB
Python

import datetime
import json
import os
import sys
import re
import glob
import shutil
import errno
import subprocess
from lxml import etree
from urllib.request import urlopen
scheduleUrl = 'http://data.c3voc.de/rC3/everything.schedule.xml'
scheduleTree=None
# Download the Events-Schedule and parse all Events out of it. Yield a tupel for each Event
def downloadSchedule(scheduleUrl):
print("downloading schedule")
# download the schedule
response = urlopen(scheduleUrl)
# read xml-source
xml = response.read()
# parse into ElementTree
parser = etree.XMLParser(huge_tree=True)
return etree.fromstring(xml, parser)
def getSchedule(scheduleUrl):
global scheduleTree
if not scheduleTree:
scheduleTree=downloadSchedule(scheduleUrl)
return scheduleTree
def persons(scheduleUrl, personmap={}, taglinemap={}, forEventId=None):
schedule = getSchedule(scheduleUrl)
# iterate all days
for day in schedule.iter('day'):
# iterate all rooms
for room in day.iter('room'):
# iterate events on that day in this room
for event in room.iter('event'):
eventid = int(event.get("id"))
if event != None and not eventid == forEventId:
continue
# aggregate names of the persons holding this talk
persons_seen = []
if event.find('persons') is not None:
for person in event.find('persons').iter('person'):
id = int(person.get("id"))
person = re.sub(r'\s+', ' ', person.text).strip()
match = re.search(r'\((.*?)\)', person)
tagline = ''
if not match is None:
tagline = match.group(1)
person = person.split(" (")[0]
if id in taglinemap:
tagline = taglinemap[id]
if id in personmap:
person = personmap[id]
if not id in persons_seen:
persons_seen.append(id)
yield {
'id': id,
'person': person,
'tagline': tagline
}
def events(scheduleUrl, titlemap={}):
schedule = getSchedule(scheduleUrl)
# iterate all days
for day in schedule.iter('day'):
# iterate all rooms
for room in day.iter('room'):
# iterate events on that day in this room
for event in room.iter('event'):
# aggregate names of the persons holding this talk
personnames = []
if event.find('persons') is not None:
for person in event.find('persons').iter('person'):
personname = re.sub(r'\s+', ' ', person.text).strip()
personnames.append(personname)
id = int(event.get('id'))
if id in titlemap:
title = titlemap[id]
elif event.find('title') is not None and event.find('title').text is not None:
title = re.sub(r'\s+', ' ', event.find('title').text).strip()
else:
title = ''
if event.find('subtitle') is not None and event.find('subtitle').text is not None:
subtitle = re.sub(r'\s+', ' ', event.find('subtitle').text).strip()
else:
subtitle = ''
if event.find('duration') is not None and ':' in event.find('duration').text:
parts = event.find('duration').text.split(':', 1)
duration = int(parts[0]) * 60 + int(parts[1])
else:
duration = None
if event.find('start') is not None and event.find('start').text is not None:
start = re.sub(r'\s+', ' ', event.find('start').text).strip()
else:
start = ''
# yield a tupel with the event-id, event-title and person-names
yield {
'day': day.get('index'),
'id': id,
'title': title,
'subtitle': subtitle,
#'persons': personnames,
'personnames': ', '.join(personnames),
'room': room.attrib['name'],
'track': event.find('track').text,
'start': event.find('start').text,
'datetime': event.find('date').text,
'duration': duration,
'roomguid': room.attrib['guid'] if 'guid' in room.attrib else '',
#'url': event.find('url').text
}
if __name__ == "__main__":
events = list(events(scheduleUrl))
# FIX: Some events have an incorrectly specified timezone (UTC instead of CET)
for ev in events:
if ev['datetime'].endswith("+00:00"):
ev['datetime'] = ev['datetime'].replace("+00:00", "+01:00")
print(f"Warning: Assuming CET instead of UTC for event '{ev['title']}': {ev['datetime']}")
events = sorted(events, key=lambda x: datetime.datetime.fromisoformat(x["datetime"]))
# Filter already passed events
# (There is a grace period of 30 mins after start for events longer than that)
grace = datetime.timedelta(minutes=30)
def is_upcoming(ev, now=datetime.datetime.now().astimezone()):
ev_date = datetime.datetime.fromisoformat(ev["datetime"])
return ev_date + grace > now and ev_date + datetime.timedelta(minutes=ev["duration"]) > now
events = filter(is_upcoming, events)
with open("fahrplan.json", "w") as f:
json.dump(list(events), f)