rc3-html-infobeamer/fahrplan.py

153 lines
5.8 KiB
Python
Raw Normal View History

2020-12-22 19:50:45 +01:00
import datetime
import json
import os
import sys
import re
import glob
import shutil
import errno
import subprocess
from lxml import etree
from urllib.request import urlopen
2021-12-21 22:37:19 +01:00
scheduleUrl = 'http://data.c3voc.de/rC3_21/everything.schedule.xml'
2020-12-22 19:50:45 +01:00
scheduleTree=None
# Download the Events-Schedule and parse all Events out of it. Yield a tupel for each Event
def downloadSchedule(scheduleUrl):
print("downloading schedule")
# download the schedule
response = urlopen(scheduleUrl)
# read xml-source
xml = response.read()
# parse into ElementTree
parser = etree.XMLParser(huge_tree=True)
return etree.fromstring(xml, parser)
def getSchedule(scheduleUrl):
global scheduleTree
if not scheduleTree:
scheduleTree=downloadSchedule(scheduleUrl)
return scheduleTree
def persons(scheduleUrl, personmap={}, taglinemap={}, forEventId=None):
schedule = getSchedule(scheduleUrl)
# iterate all days
for day in schedule.iter('day'):
# iterate all rooms
for room in day.iter('room'):
# iterate events on that day in this room
for event in room.iter('event'):
eventid = int(event.get("id"))
if event != None and not eventid == forEventId:
continue
# aggregate names of the persons holding this talk
persons_seen = []
if event.find('persons') is not None:
for person in event.find('persons').iter('person'):
id = int(person.get("id"))
person = re.sub(r'\s+', ' ', person.text).strip()
match = re.search(r'\((.*?)\)', person)
tagline = ''
if not match is None:
tagline = match.group(1)
person = person.split(" (")[0]
if id in taglinemap:
tagline = taglinemap[id]
if id in personmap:
person = personmap[id]
if not id in persons_seen:
persons_seen.append(id)
yield {
'id': id,
'person': person,
'tagline': tagline
}
def events(scheduleUrl, titlemap={}):
schedule = getSchedule(scheduleUrl)
# iterate all days
for day in schedule.iter('day'):
# iterate all rooms
for room in day.iter('room'):
# iterate events on that day in this room
for event in room.iter('event'):
# aggregate names of the persons holding this talk
personnames = []
if event.find('persons') is not None:
for person in event.find('persons').iter('person'):
personname = re.sub(r'\s+', ' ', person.text).strip()
personnames.append(personname)
id = int(event.get('id'))
if id in titlemap:
title = titlemap[id]
elif event.find('title') is not None and event.find('title').text is not None:
title = re.sub(r'\s+', ' ', event.find('title').text).strip()
else:
title = ''
if event.find('subtitle') is not None and event.find('subtitle').text is not None:
subtitle = re.sub(r'\s+', ' ', event.find('subtitle').text).strip()
else:
subtitle = ''
if event.find('duration') is not None and ':' in event.find('duration').text:
parts = event.find('duration').text.split(':', 1)
duration = int(parts[0]) * 60 + int(parts[1])
else:
duration = None
2020-12-22 19:50:45 +01:00
if event.find('start') is not None and event.find('start').text is not None:
start = re.sub(r'\s+', ' ', event.find('start').text).strip()
else:
start = ''
# yield a tupel with the event-id, event-title and person-names
yield {
'day': day.get('index'),
'id': id,
'title': title,
'subtitle': subtitle,
#'persons': personnames,
'personnames': ', '.join(personnames),
'room': room.attrib['name'],
'track': event.find('track').text,
'start': event.find('start').text,
'datetime': event.find('date').text,
'duration': duration,
2020-12-22 19:50:45 +01:00
'roomguid': room.attrib['guid'] if 'guid' in room.attrib else '',
#'url': event.find('url').text
}
if __name__ == "__main__":
events = list(events(scheduleUrl))
# FIX: Some events have an incorrectly specified timezone (UTC instead of CET)
for ev in events:
if ev['datetime'].endswith("+00:00"):
ev['datetime'] = ev['datetime'].replace("+00:00", "+01:00")
print(f"Warning: Assuming CET instead of UTC for event '{ev['title']}': {ev['datetime']}")
events = sorted(events, key=lambda x: datetime.datetime.fromisoformat(x["datetime"]))
2020-12-22 19:50:45 +01:00
# Filter already passed events
# (There is a grace period of 30 mins after start for events longer than that)
grace = datetime.timedelta(minutes=30)
def is_upcoming(ev, now=datetime.datetime.now().astimezone()):
ev_date = datetime.datetime.fromisoformat(ev["datetime"])
return ev_date + grace > now and ev_date + datetime.timedelta(minutes=ev["duration"]) > now
events = filter(is_upcoming, events)
2021-12-21 22:48:21 +01:00
with open("public/fahrplan.json", "w") as f:
json.dump(list(events), f)
2020-12-22 19:50:45 +01:00