2020-12-22 19:50:45 +01:00
|
|
|
import datetime
|
|
|
|
import json
|
|
|
|
import os
|
|
|
|
import sys
|
|
|
|
import re
|
|
|
|
import glob
|
|
|
|
import shutil
|
|
|
|
import errno
|
|
|
|
import subprocess
|
|
|
|
from lxml import etree
|
|
|
|
from urllib.request import urlopen
|
|
|
|
|
|
|
|
scheduleUrl = 'http://data.c3voc.de/rC3/everything.schedule.xml'
|
|
|
|
|
|
|
|
scheduleTree=None
|
|
|
|
|
|
|
|
# Download the Events-Schedule and parse all Events out of it. Yield a tupel for each Event
|
|
|
|
def downloadSchedule(scheduleUrl):
|
|
|
|
print("downloading schedule")
|
|
|
|
|
|
|
|
# download the schedule
|
|
|
|
response = urlopen(scheduleUrl)
|
|
|
|
|
|
|
|
# read xml-source
|
|
|
|
xml = response.read()
|
|
|
|
|
|
|
|
# parse into ElementTree
|
|
|
|
parser = etree.XMLParser(huge_tree=True)
|
|
|
|
return etree.fromstring(xml, parser)
|
|
|
|
|
|
|
|
def getSchedule(scheduleUrl):
|
|
|
|
global scheduleTree
|
|
|
|
if not scheduleTree:
|
|
|
|
scheduleTree=downloadSchedule(scheduleUrl)
|
|
|
|
return scheduleTree
|
|
|
|
|
|
|
|
def persons(scheduleUrl, personmap={}, taglinemap={}, forEventId=None):
|
|
|
|
schedule = getSchedule(scheduleUrl)
|
|
|
|
# iterate all days
|
|
|
|
for day in schedule.iter('day'):
|
|
|
|
# iterate all rooms
|
|
|
|
for room in day.iter('room'):
|
|
|
|
# iterate events on that day in this room
|
|
|
|
for event in room.iter('event'):
|
|
|
|
eventid = int(event.get("id"))
|
|
|
|
if event != None and not eventid == forEventId:
|
|
|
|
continue
|
|
|
|
# aggregate names of the persons holding this talk
|
|
|
|
persons_seen = []
|
|
|
|
if event.find('persons') is not None:
|
|
|
|
for person in event.find('persons').iter('person'):
|
|
|
|
id = int(person.get("id"))
|
|
|
|
person = re.sub(r'\s+', ' ', person.text).strip()
|
|
|
|
match = re.search(r'\((.*?)\)', person)
|
|
|
|
tagline = ''
|
|
|
|
if not match is None:
|
|
|
|
tagline = match.group(1)
|
|
|
|
person = person.split(" (")[0]
|
|
|
|
if id in taglinemap:
|
|
|
|
tagline = taglinemap[id]
|
|
|
|
if id in personmap:
|
|
|
|
person = personmap[id]
|
|
|
|
if not id in persons_seen:
|
|
|
|
persons_seen.append(id)
|
|
|
|
yield {
|
|
|
|
'id': id,
|
|
|
|
'person': person,
|
|
|
|
'tagline': tagline
|
|
|
|
}
|
|
|
|
|
|
|
|
def events(scheduleUrl, titlemap={}):
|
|
|
|
schedule = getSchedule(scheduleUrl)
|
|
|
|
# iterate all days
|
|
|
|
for day in schedule.iter('day'):
|
|
|
|
# iterate all rooms
|
|
|
|
for room in day.iter('room'):
|
|
|
|
# iterate events on that day in this room
|
|
|
|
for event in room.iter('event'):
|
|
|
|
# aggregate names of the persons holding this talk
|
|
|
|
personnames = []
|
|
|
|
if event.find('persons') is not None:
|
|
|
|
for person in event.find('persons').iter('person'):
|
|
|
|
personname = re.sub(r'\s+', ' ', person.text).strip()
|
|
|
|
personnames.append(personname)
|
|
|
|
|
|
|
|
id = int(event.get('id'))
|
|
|
|
|
|
|
|
if id in titlemap:
|
|
|
|
title = titlemap[id]
|
|
|
|
elif event.find('title') is not None and event.find('title').text is not None:
|
|
|
|
title = re.sub(r'\s+', ' ', event.find('title').text).strip()
|
|
|
|
else:
|
|
|
|
title = ''
|
|
|
|
|
|
|
|
if event.find('subtitle') is not None and event.find('subtitle').text is not None:
|
|
|
|
subtitle = re.sub(r'\s+', ' ', event.find('subtitle').text).strip()
|
|
|
|
else:
|
|
|
|
subtitle = ''
|
|
|
|
|
2020-12-24 13:58:06 +01:00
|
|
|
if event.find('duration') is not None and ':' in event.find('duration').text:
|
|
|
|
parts = event.find('duration').text.split(':', 1)
|
|
|
|
duration = int(parts[0]) * 60 + int(parts[1])
|
|
|
|
else:
|
|
|
|
duration = None
|
|
|
|
|
2020-12-22 19:50:45 +01:00
|
|
|
if event.find('start') is not None and event.find('start').text is not None:
|
|
|
|
start = re.sub(r'\s+', ' ', event.find('start').text).strip()
|
|
|
|
else:
|
|
|
|
start = ''
|
|
|
|
|
|
|
|
# yield a tupel with the event-id, event-title and person-names
|
|
|
|
yield {
|
|
|
|
'day': day.get('index'),
|
|
|
|
'id': id,
|
|
|
|
'title': title,
|
|
|
|
'subtitle': subtitle,
|
|
|
|
#'persons': personnames,
|
|
|
|
'personnames': ', '.join(personnames),
|
|
|
|
'room': room.attrib['name'],
|
|
|
|
'track': event.find('track').text,
|
|
|
|
'start': event.find('start').text,
|
|
|
|
'datetime': event.find('date').text,
|
2020-12-24 13:58:06 +01:00
|
|
|
'duration': duration,
|
2020-12-22 19:50:45 +01:00
|
|
|
'roomguid': room.attrib['guid'] if 'guid' in room.attrib else '',
|
|
|
|
#'url': event.find('url').text
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if __name__ == "__main__":
|
2020-12-24 13:59:12 +01:00
|
|
|
events = list(events(scheduleUrl))
|
|
|
|
|
|
|
|
# FIX: Some events have an incorrectly specified timezone (UTC instead of CET)
|
|
|
|
for ev in events:
|
|
|
|
if ev['datetime'].endswith("+00:00"):
|
|
|
|
ev['datetime'] = ev['datetime'].replace("+00:00", "+01:00")
|
|
|
|
print(f"Warning: Assuming CET instead of UTC for event '{ev['title']}': {ev['datetime']}")
|
|
|
|
|
2020-12-23 12:36:16 +01:00
|
|
|
events = sorted(events, key=lambda x: datetime.datetime.fromisoformat(x["datetime"]))
|
2020-12-22 19:50:45 +01:00
|
|
|
|
2020-12-24 13:58:06 +01:00
|
|
|
# Filter already passed events
|
|
|
|
# (There is a grace period of 30 mins after start for events longer than that)
|
|
|
|
grace = datetime.timedelta(minutes=30)
|
|
|
|
def is_upcoming(ev, now=datetime.datetime.now().astimezone()):
|
|
|
|
ev_date = datetime.datetime.fromisoformat(ev["datetime"])
|
|
|
|
return ev_date + grace > now and ev_date + datetime.timedelta(minutes=ev["duration"]) > now
|
|
|
|
events = filter(is_upcoming, events)
|
|
|
|
|
2020-12-22 19:50:45 +01:00
|
|
|
with open("fahrplan.json", "w") as f:
|
2020-12-24 13:58:06 +01:00
|
|
|
json.dump(list(events), f)
|
2020-12-22 19:50:45 +01:00
|
|
|
|
|
|
|
|