Skip to content

Commit 86da59c

Browse files
author
Piotr Mitros
committed
Nicer descriptions
1 parent 50fa878 commit 86da59c

File tree

3 files changed

+117
-54
lines changed

3 files changed

+117
-54
lines changed

helpers.py

+29-3
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
import datetime
12
import json
23
import os,os.path
34
import re
@@ -229,21 +230,37 @@ def save_tree(basepath, tree):
229230

230231
yt_service = None
231232
def youtube_entry(video):
233+
global yt_service
232234
if not yt_service:
233235
import gdata.youtube.service
234236
yt_service = gdata.youtube.service.YouTubeService()
235237

236-
# TODO: Parse traditional entries.
237-
video_id = video.attrib.get('youtube_id_1_0', None)
238+
# TODO: Parse traditional XML entries.
239+
# Handle both XML <video> elements and straight-up Youtube IDs
240+
if not isinstance(video, basestring):
241+
video_id = video.attrib.get('youtube_id_1_0', None)
242+
else:
243+
video_id = video
238244
if not video_id:
239245
return
240246

241247
entry = yt_service.GetYouTubeVideoEntry(video_id=video_id)
242248
return {'title': entry.media.duration.text,
243249
'duration': float(entry.media.duration.seconds),
244-
'duration_str': str(datetime.timedelta(seconds = int(entry.media.duration.seconds))),
250+
'duration_str': format_time_delta(entry.media.duration.seconds),
245251
'description' : entry.media.description.text}
246252

253+
def format_time_delta(time):
254+
''' Pretty-print a time delta. Parameters is number of seconds. '''
255+
time_delta = str(datetime.timedelta(seconds = int(time)))
256+
# Strip trailing 00:0 from 00:03:45
257+
while time_delta[:1] in "0:":
258+
time_delta = time_delta[1:]
259+
# If time delta is 0, continue
260+
if len(time_delta) == 0:
261+
time_delta = "0"
262+
return time_delta
263+
247264
def propagate_youtube_information(tree):
248265
''' Retrieve information from Youtube. Use it to set
249266
display_names for videos.
@@ -263,3 +280,12 @@ def propagate_youtube_information(tree):
263280
e.attrib['display_name'] = "{title} ({duration})".format(title=vid_info['title'],
264281
duration=vid_info['duration_str'])
265282

283+
def format_file_size(num):
284+
''' Format a number of bytes into a human-readable size.
285+
http://stackoverflow.com/questions/1094841/reusable-library-to-get-human-readable-version-of-file-size
286+
'''
287+
for x in ['bytes','KB','MB','GB']:
288+
if num < 1024.0 and num > -1024.0:
289+
return "%3.1f%s" % (num, x)
290+
num /= 1024.0
291+
return "%3.1f%s" % (num, 'TB')

make_course_rss.py

+86-51
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,23 @@
1+
''' This is a script which will take an edX course export, and create
2+
an RSS feed from it.
3+
4+
I *strongly* recommend running clean_studio_xml on a dump before
5+
running this script.
6+
7+
Limitations:
8+
9+
* This does not pay attention to release dates. To-be-released videos
10+
can appear in the RSS feed.
11+
* Courses must use Youtube videos. I use Youtube as a
12+
transcoder. Google invested millions into doing this well, and I
13+
didn't want to replicate the effort. As a result, if Google changes
14+
things around, we may need to swap things around.
15+
* We don't have course URLs by default. This sure would be nice.
16+
* It would be nice to embed pages for where we have assessments and
17+
interactives. RSS supports this, but the script does not (in part
18+
due to complexity of generating URLs).
19+
'''
20+
121
import StringIO
222
import argparse
323
import datetime
@@ -13,50 +33,62 @@
1333
import helpers
1434

1535
parser = argparse.ArgumentParser(description = "Generate an RSS feed of a course.")
16-
parser.add_argument("base", help="Base directory of Studio-dumped XML")
36+
parser.add_argument("export_base", help="Base directory of Studio-dumped XML")
1737
parser.add_argument("url_base", help="URL the feed will be hosted from")
1838
parser.add_argument("--format", help="Format of RSS feed (mp4, webm, 3gp, or m4a)", default='webm', dest='format')
1939
parser.add_argument("--course_url", help="URL of the course about page", default="https://www.edx.org/", dest="course_url")
2040

2141
args = parser.parse_args()
2242

23-
video_format = args.format
24-
url_base = args.url_base
25-
base = args.base
2643

2744
# Video format params
28-
vfp = { 'mp4': {'vyd' : 'mp4', # Youtube downloader
29-
'vfn':'mp4', # Filename extension
30-
'vmt':'video/mp4', # MIME type
31-
'vdr': 'mp4', # Directory
32-
'vcn': 'MPEG Video', # Video codec name
33-
'vdc': 'This RSS feed is for MPEG videos. This is the most common video format and should work with most software. ' # Description
34-
},
35-
'webm': {'vyd' : 'webm', # Youtube downloader
36-
'vfn':'webm', # Filename extension
37-
'vmt':'video/webm', # MIME type
38-
'vdr': 'webm', # Directory
39-
'vcn': 'WebM Video', # Video codec name
40-
'vdc': 'This RSS feed is using WebM videos. WebM is an advanced video format developed by Goolgle. This is the recommended feed if your software supports it (most software does not). ' # Description
41-
},
42-
'3gp': {'vyd' : '3gp', # Youtube downloader
43-
'vfn':'3gp', # Filename extension
44-
'vmt':'video/3gpp', # MIME type
45-
'vdr': '3gp', # Directory
46-
'vcn': '3GPP Video', # Video codec name
47-
'vdc': 'This RSS feed is for video files in the 3gpp format. 3gpp is a low-bandwidth format commonly used for video delivered to cell phones. ' # Description
48-
},
49-
'm4a': {'vyd' : '140', # Youtube downloader
50-
'vfn':'m4a', # Filename extension
51-
'vmt':'audio/mp4a-latm', # MIME type
52-
'vdr': 'm4a', # Directory
53-
'vcn': 'AAC Audio', # Video codec name
54-
'vdc': 'This is an audio-only RSS feed. It uses the AAC audio codec. ' # Description
55-
},
56-
}
57-
58-
print base
59-
tree = helpers.load_xml_course(base)
45+
video_format_parameters = { 'mp4': {'youtube_dl_code' : 'mp4',
46+
'video_extension':'mp4',
47+
'mimetype':'video/mp4',
48+
'video_codec_name': 'MPEG Video',
49+
'codec_description': 'This RSS feed is for MPEG videos. This is the most common video format and should work with most software. '
50+
},
51+
'webm': {'youtube_dl_code' : 'webm',
52+
'video_extension':'webm',
53+
'mimetype':'video/webm',
54+
'video_codec_name': 'WebM Video',
55+
'codec_description': 'This RSS feed is using WebM videos. WebM is an advanced video format developed by Google. This is the recommended feed if your software supports it (most software does not). '
56+
},
57+
'3gp': {'youtube_dl_code' : '3gp',
58+
'video_extension':'3gp',
59+
'mimetype':'video/3gpp',
60+
'video_codec_name': '3GPP Video',
61+
'codec_description': 'This RSS feed is for video files in the 3gpp format. 3gpp is a low-bandwidth format commonly used for video delivered to cell phones. '
62+
},
63+
'm4a': {'youtube_dl_code' : '140',
64+
'video_extension':'m4a',
65+
'mimetype':'audio/mp4a-latm',
66+
'video_codec_name': 'AAC Audio',
67+
'codec_description': 'This is an audio-only RSS feed. It uses the AAC audio codec. '
68+
},
69+
}
70+
71+
video_format = args.format
72+
conf = { 'video_format' : args.format,
73+
'url_base' : args.url_base,
74+
'export_base' : args.export_base,
75+
'course_url':args.course_url,
76+
'mimetype' : video_format_parameters[video_format]['mimetype'],
77+
'codec_description' : video_format_parameters[video_format]['codec_description'],
78+
'video_codec_name' : video_format_parameters[video_format]['video_codec_name'],
79+
'youtube_dl_code' : video_format_parameters[video_format]['youtube_dl_code'],
80+
'video_extension' : video_format_parameters[video_format]['video_extension'],
81+
'course_description': '''A prototype podcast of the videos from {course_name}, a course from {course_org} on edX. The full course, including assessments, is available, free-of-charge, at {course_url}. {codec_description} Note that this is a podcast of just the videos from an interactive on-line course; in some cases, the videos may be difficult to follow without integrated assessments, simulations, or other interactions at {course_url}. For a more complete experience, please visit the full course. ''',
82+
'video_description': '''{video_location}. This is a prototype podcast of the videos from {course_name}. The full course is available free-of-charge at {course_url}. Note that the full course includes assessments, as well as other interactives (such as simulations, discussions, etc.). Some videos may be difficult to follow without the integrated interactions. For a more complete experience, please visit the full course. ({pretty_length}, {duration}, {video_codec_name}) ''',
83+
}
84+
85+
print "Encoding", conf['export_base']
86+
tree = helpers.load_xml_course(conf['export_base'])
87+
88+
conf.update({'course_org' : tree.getroot().attrib['org'],
89+
'course_number' : tree.getroot().attrib['course'],
90+
'course_id' : tree.getroot().attrib['url_name'],
91+
'course_name' : tree.getroot().attrib['display_name']})
6092

6193
items = []
6294

@@ -82,29 +114,31 @@
82114
node = node.parent
83115
description.reverse()
84116

85-
item_dict['description'] = "edX RSS Prototype. Video is from "+(" / ".join(description))
86117

87-
base_filename = youtube_id+"."+vfp[video_format]['vfn']
118+
base_filename = youtube_id+"."+conf['video_extension']
88119
dl_filename = os.path.join('output', base_filename)
89120
if not os.path.exists(dl_filename):
90-
command = "youtube-dl -f {fmt} https://www.youtube.com/watch?v={uid} -o {file}".format(fmt=vfp[video_format]['vyd'],
121+
command = "youtube-dl -f {fmt} https://www.youtube.com/watch?v={uid} -o {file}".format(fmt=conf['youtube_dl_code'],
91122
uid=youtube_id,
92123
file=dl_filename)
93124
os.system(command)
94-
item_dict['enclosure'] = PyRSS2Gen.Enclosure(url=urlparse.urljoin(url_base, base_filename),
95-
length=os.stat(dl_filename).st_size,
96-
type=vfp[video_format]['vmt'])
97-
items.append(PyRSS2Gen.RSSItem(**item_dict))
125+
length = os.stat(dl_filename).st_size
126+
pretty_length = helpers.format_file_size(length)
127+
128+
item_dict['description'] = conf['video_description'].format(video_location = (" / ".join(description)),
129+
pretty_length = pretty_length,
130+
duration = helpers.youtube_entry(youtube_id)['duration_str'],
131+
**conf)
98132

99-
xml_org = tree.getroot().attrib['org']
100-
xml_course = tree.getroot().attrib['course']
101-
xml_url_name = tree.getroot().attrib['url_name']
102-
xml_course_name = tree.getroot().attrib['display_name']
133+
item_dict['enclosure'] = PyRSS2Gen.Enclosure(url=urlparse.urljoin(conf['url_base'], base_filename),
134+
length=length,
135+
type=conf['mimetype'])
136+
items.append(PyRSS2Gen.RSSItem(**item_dict))
103137

104138
rss = PyRSS2Gen.RSS2(
105139
title = tree.getroot().attrib['display_name'],
106140
link = args.course_url,
107-
description = "A prototype podcast of the videos from {coursename}, a course from {org} on edX. The full course, including assessments, is available, free-of-charge, at {course_url}. {feedtype} Note that this is an interactive course; in some cases, the videos may be difficult to follow without the integrated interactive content on http://www.edx.org.".format(coursename=xml_course_name, org=xml_org, course_url = args.course_url, feedtype = vfp[video_format]['vdc']),
141+
description = conf["course_description"].format(**conf),
108142
lastBuildDate = datetime.datetime.now(),
109143
items = items,
110144
managingEditor = "edX Learning Sciences"
@@ -113,10 +147,11 @@
113147
## Write output to a file
114148
data = StringIO.StringIO()
115149
rss.write_xml(data)
116-
output_filename = "output/{org}_{course}_{url_name}_{format}.rss".format(org = xml_org,
117-
course = xml_course,
118-
url_name = xml_url_name,
150+
output_filename = "output/{org}_{course}_{url_name}_{format}.rss".format(org = conf['course_org'],
151+
course = conf['course_number'],
152+
url_name = conf['course_id'],
119153
format = video_format)
120154
f = open(output_filename, "w")
121155
f.write(xml.dom.minidom.parseString(data.getvalue()).toprettyxml())
122156
f.close()
157+
print "Saved ", output_filename

requirements.txt

+2
Original file line numberDiff line numberDiff line change
@@ -1 +1,3 @@
11
PyRSS2Gen
2+
gdata
3+
youtube-dl

0 commit comments

Comments
 (0)