131 lines
4.8 KiB
Python
131 lines
4.8 KiB
Python
|
import datetime
|
||
|
import json
|
||
|
import os
|
||
|
import sys
|
||
|
import re
|
||
|
import glob
|
||
|
import shutil
|
||
|
import errno
|
||
|
import subprocess
|
||
|
from lxml import etree
|
||
|
from urllib.request import urlopen
|
||
|
|
||
|
scheduleUrl = 'http://data.c3voc.de/rC3/everything.schedule.xml'
|
||
|
|
||
|
scheduleTree=None
|
||
|
|
||
|
# Download the Events-Schedule and parse all Events out of it. Yield a tupel for each Event
|
||
|
def downloadSchedule(scheduleUrl):
|
||
|
print("downloading schedule")
|
||
|
|
||
|
# download the schedule
|
||
|
response = urlopen(scheduleUrl)
|
||
|
|
||
|
# read xml-source
|
||
|
xml = response.read()
|
||
|
|
||
|
# parse into ElementTree
|
||
|
parser = etree.XMLParser(huge_tree=True)
|
||
|
return etree.fromstring(xml, parser)
|
||
|
|
||
|
def getSchedule(scheduleUrl):
|
||
|
global scheduleTree
|
||
|
if not scheduleTree:
|
||
|
scheduleTree=downloadSchedule(scheduleUrl)
|
||
|
return scheduleTree
|
||
|
|
||
|
def persons(scheduleUrl, personmap={}, taglinemap={}, forEventId=None):
|
||
|
schedule = getSchedule(scheduleUrl)
|
||
|
# iterate all days
|
||
|
for day in schedule.iter('day'):
|
||
|
# iterate all rooms
|
||
|
for room in day.iter('room'):
|
||
|
# iterate events on that day in this room
|
||
|
for event in room.iter('event'):
|
||
|
eventid = int(event.get("id"))
|
||
|
if event != None and not eventid == forEventId:
|
||
|
continue
|
||
|
# aggregate names of the persons holding this talk
|
||
|
persons_seen = []
|
||
|
if event.find('persons') is not None:
|
||
|
for person in event.find('persons').iter('person'):
|
||
|
id = int(person.get("id"))
|
||
|
person = re.sub(r'\s+', ' ', person.text).strip()
|
||
|
match = re.search(r'\((.*?)\)', person)
|
||
|
tagline = ''
|
||
|
if not match is None:
|
||
|
tagline = match.group(1)
|
||
|
person = person.split(" (")[0]
|
||
|
if id in taglinemap:
|
||
|
tagline = taglinemap[id]
|
||
|
if id in personmap:
|
||
|
person = personmap[id]
|
||
|
if not id in persons_seen:
|
||
|
persons_seen.append(id)
|
||
|
yield {
|
||
|
'id': id,
|
||
|
'person': person,
|
||
|
'tagline': tagline
|
||
|
}
|
||
|
|
||
|
def events(scheduleUrl, titlemap={}):
|
||
|
schedule = getSchedule(scheduleUrl)
|
||
|
# iterate all days
|
||
|
for day in schedule.iter('day'):
|
||
|
# iterate all rooms
|
||
|
for room in day.iter('room'):
|
||
|
# iterate events on that day in this room
|
||
|
for event in room.iter('event'):
|
||
|
# aggregate names of the persons holding this talk
|
||
|
personnames = []
|
||
|
if event.find('persons') is not None:
|
||
|
for person in event.find('persons').iter('person'):
|
||
|
personname = re.sub(r'\s+', ' ', person.text).strip()
|
||
|
personnames.append(personname)
|
||
|
|
||
|
id = int(event.get('id'))
|
||
|
|
||
|
if id in titlemap:
|
||
|
title = titlemap[id]
|
||
|
elif event.find('title') is not None and event.find('title').text is not None:
|
||
|
title = re.sub(r'\s+', ' ', event.find('title').text).strip()
|
||
|
else:
|
||
|
title = ''
|
||
|
|
||
|
if event.find('subtitle') is not None and event.find('subtitle').text is not None:
|
||
|
subtitle = re.sub(r'\s+', ' ', event.find('subtitle').text).strip()
|
||
|
else:
|
||
|
subtitle = ''
|
||
|
|
||
|
if event.find('start') is not None and event.find('start').text is not None:
|
||
|
start = re.sub(r'\s+', ' ', event.find('start').text).strip()
|
||
|
else:
|
||
|
start = ''
|
||
|
|
||
|
# yield a tupel with the event-id, event-title and person-names
|
||
|
yield {
|
||
|
'day': day.get('index'),
|
||
|
'id': id,
|
||
|
'title': title,
|
||
|
'subtitle': subtitle,
|
||
|
#'persons': personnames,
|
||
|
'personnames': ', '.join(personnames),
|
||
|
'room': room.attrib['name'],
|
||
|
'track': event.find('track').text,
|
||
|
'start': event.find('start').text,
|
||
|
'datetime': event.find('date').text,
|
||
|
'roomguid': room.attrib['guid'] if 'guid' in room.attrib else '',
|
||
|
#'url': event.find('url').text
|
||
|
}
|
||
|
|
||
|
|
||
|
|
||
|
if __name__ == "__main__":
|
||
|
events = list(events(scheduleUrl))
|
||
|
events = sorted(events, key=lambda x: datetime.datetime.strptime(x["datetime"], "%Y-%m-%dT%H:%M:%S%z"))
|
||
|
|
||
|
with open("fahrplan.json", "w") as f:
|
||
|
json.dump(events, f)
|
||
|
|
||
|
|