|
| 1 | +''' This is a script which will take an edX course export, and create |
| 2 | +an RSS feed from it. |
| 3 | +
|
| 4 | +I *strongly* recommend running clean_studio_xml on a dump before |
| 5 | +running this script. |
| 6 | +
|
| 7 | +Limitations: |
| 8 | +
|
| 9 | +* This does not pay attention to release dates. To-be-released videos |
| 10 | + can appear in the RSS feed. |
| 11 | +* Courses must use Youtube videos. I use Youtube as a |
| 12 | + transcoder. Google invested millions into doing this well, and I |
| 13 | + didn't want to replicate the effort. As a result, if Google changes |
| 14 | + things around, we may need to swap things around. |
| 15 | +* We don't have course URLs by default. This sure would be nice. |
| 16 | +* It would be nice to embed pages for where we have assessments and |
| 17 | + interactives. RSS supports this, but the script does not (in part |
| 18 | + due to complexity of generating URLs). |
| 19 | +''' |
| 20 | + |
1 | 21 | import StringIO
|
2 | 22 | import argparse
|
3 | 23 | import datetime
|
|
13 | 33 | import helpers
|
14 | 34 |
|
15 | 35 | parser = argparse.ArgumentParser(description = "Generate an RSS feed of a course.")
|
16 |
| -parser.add_argument("base", help="Base directory of Studio-dumped XML") |
| 36 | +parser.add_argument("export_base", help="Base directory of Studio-dumped XML") |
17 | 37 | parser.add_argument("url_base", help="URL the feed will be hosted from")
|
18 | 38 | parser.add_argument("--format", help="Format of RSS feed (mp4, webm, 3gp, or m4a)", default='webm', dest='format')
|
19 | 39 | parser.add_argument("--course_url", help="URL of the course about page", default="https://www.edx.org/", dest="course_url")
|
20 | 40 |
|
21 | 41 | args = parser.parse_args()
|
22 | 42 |
|
23 |
| -video_format = args.format |
24 |
| -url_base = args.url_base |
25 |
| -base = args.base |
26 | 43 |
|
27 | 44 | # Video format params
|
28 |
| -vfp = { 'mp4': {'vyd' : 'mp4', # Youtube downloader |
29 |
| - 'vfn':'mp4', # Filename extension |
30 |
| - 'vmt':'video/mp4', # MIME type |
31 |
| - 'vdr': 'mp4', # Directory |
32 |
| - 'vcn': 'MPEG Video', # Video codec name |
33 |
| - 'vdc': 'This RSS feed is for MPEG videos. This is the most common video format and should work with most software. ' # Description |
34 |
| - }, |
35 |
| - 'webm': {'vyd' : 'webm', # Youtube downloader |
36 |
| - 'vfn':'webm', # Filename extension |
37 |
| - 'vmt':'video/webm', # MIME type |
38 |
| - 'vdr': 'webm', # Directory |
39 |
| - 'vcn': 'WebM Video', # Video codec name |
40 |
| - 'vdc': 'This RSS feed is using WebM videos. WebM is an advanced video format developed by Goolgle. This is the recommended feed if your software supports it (most software does not). ' # Description |
41 |
| - }, |
42 |
| - '3gp': {'vyd' : '3gp', # Youtube downloader |
43 |
| - 'vfn':'3gp', # Filename extension |
44 |
| - 'vmt':'video/3gpp', # MIME type |
45 |
| - 'vdr': '3gp', # Directory |
46 |
| - 'vcn': '3GPP Video', # Video codec name |
47 |
| - 'vdc': 'This RSS feed is for video files in the 3gpp format. 3gpp is a low-bandwidth format commonly used for video delivered to cell phones. ' # Description |
48 |
| - }, |
49 |
| - 'm4a': {'vyd' : '140', # Youtube downloader |
50 |
| - 'vfn':'m4a', # Filename extension |
51 |
| - 'vmt':'audio/mp4a-latm', # MIME type |
52 |
| - 'vdr': 'm4a', # Directory |
53 |
| - 'vcn': 'AAC Audio', # Video codec name |
54 |
| - 'vdc': 'This is an audio-only RSS feed. It uses the AAC audio codec. ' # Description |
55 |
| - }, |
56 |
| - } |
57 |
| - |
58 |
| -print base |
59 |
| -tree = helpers.load_xml_course(base) |
| 45 | +video_format_parameters = { 'mp4': {'youtube_dl_code' : 'mp4', |
| 46 | + 'video_extension':'mp4', |
| 47 | + 'mimetype':'video/mp4', |
| 48 | + 'video_codec_name': 'MPEG Video', |
| 49 | + 'codec_description': 'This RSS feed is for MPEG videos. This is the most common video format and should work with most software. ' |
| 50 | + }, |
| 51 | + 'webm': {'youtube_dl_code' : 'webm', |
| 52 | + 'video_extension':'webm', |
| 53 | + 'mimetype':'video/webm', |
| 54 | + 'video_codec_name': 'WebM Video', |
| 55 | + 'codec_description': 'This RSS feed is using WebM videos. WebM is an advanced video format developed by Google. This is the recommended feed if your software supports it (most software does not). ' |
| 56 | + }, |
| 57 | + '3gp': {'youtube_dl_code' : '3gp', |
| 58 | + 'video_extension':'3gp', |
| 59 | + 'mimetype':'video/3gpp', |
| 60 | + 'video_codec_name': '3GPP Video', |
| 61 | + 'codec_description': 'This RSS feed is for video files in the 3gpp format. 3gpp is a low-bandwidth format commonly used for video delivered to cell phones. ' |
| 62 | + }, |
| 63 | + 'm4a': {'youtube_dl_code' : '140', |
| 64 | + 'video_extension':'m4a', |
| 65 | + 'mimetype':'audio/mp4a-latm', |
| 66 | + 'video_codec_name': 'AAC Audio', |
| 67 | + 'codec_description': 'This is an audio-only RSS feed. It uses the AAC audio codec. ' |
| 68 | + }, |
| 69 | + } |
| 70 | + |
| 71 | +video_format = args.format |
| 72 | +conf = { 'video_format' : args.format, |
| 73 | + 'url_base' : args.url_base, |
| 74 | + 'export_base' : args.export_base, |
| 75 | + 'course_url':args.course_url, |
| 76 | + 'mimetype' : video_format_parameters[video_format]['mimetype'], |
| 77 | + 'codec_description' : video_format_parameters[video_format]['codec_description'], |
| 78 | + 'video_codec_name' : video_format_parameters[video_format]['video_codec_name'], |
| 79 | + 'youtube_dl_code' : video_format_parameters[video_format]['youtube_dl_code'], |
| 80 | + 'video_extension' : video_format_parameters[video_format]['video_extension'], |
| 81 | + 'course_description': '''A prototype podcast of the videos from {course_name}, a course from {course_org} on edX. The full course, including assessments, is available, free-of-charge, at {course_url}. {codec_description} Note that this is a podcast of just the videos from an interactive on-line course; in some cases, the videos may be difficult to follow without integrated assessments, simulations, or other interactions at {course_url}. For a more complete experience, please visit the full course. ''', |
| 82 | + 'video_description': '''{video_location}. This is a prototype podcast of the videos from {course_name}. The full course is available free-of-charge at {course_url}. Note that the full course includes assessments, as well as other interactives (such as simulations, discussions, etc.). Some videos may be difficult to follow without the integrated interactions. For a more complete experience, please visit the full course. ({pretty_length}, {duration}, {video_codec_name}) ''', |
| 83 | + } |
| 84 | + |
| 85 | +print "Encoding", conf['export_base'] |
| 86 | +tree = helpers.load_xml_course(conf['export_base']) |
| 87 | + |
| 88 | +conf.update({'course_org' : tree.getroot().attrib['org'], |
| 89 | + 'course_number' : tree.getroot().attrib['course'], |
| 90 | + 'course_id' : tree.getroot().attrib['url_name'], |
| 91 | + 'course_name' : tree.getroot().attrib['display_name']}) |
60 | 92 |
|
61 | 93 | items = []
|
62 | 94 |
|
|
82 | 114 | node = node.parent
|
83 | 115 | description.reverse()
|
84 | 116 |
|
85 |
| - item_dict['description'] = "edX RSS Prototype. Video is from "+(" / ".join(description)) |
86 | 117 |
|
87 |
| - base_filename = youtube_id+"."+vfp[video_format]['vfn'] |
| 118 | + base_filename = youtube_id+"."+conf['video_extension'] |
88 | 119 | dl_filename = os.path.join('output', base_filename)
|
89 | 120 | if not os.path.exists(dl_filename):
|
90 |
| - command = "youtube-dl -f {fmt} https://www.youtube.com/watch?v={uid} -o {file}".format(fmt=vfp[video_format]['vyd'], |
| 121 | + command = "youtube-dl -f {fmt} https://www.youtube.com/watch?v={uid} -o {file}".format(fmt=conf['youtube_dl_code'], |
91 | 122 | uid=youtube_id,
|
92 | 123 | file=dl_filename)
|
93 | 124 | os.system(command)
|
94 |
| - item_dict['enclosure'] = PyRSS2Gen.Enclosure(url=urlparse.urljoin(url_base, base_filename), |
95 |
| - length=os.stat(dl_filename).st_size, |
96 |
| - type=vfp[video_format]['vmt']) |
97 |
| - items.append(PyRSS2Gen.RSSItem(**item_dict)) |
| 125 | + length = os.stat(dl_filename).st_size |
| 126 | + pretty_length = helpers.format_file_size(length) |
| 127 | + |
| 128 | + item_dict['description'] = conf['video_description'].format(video_location = (" / ".join(description)), |
| 129 | + pretty_length = pretty_length, |
| 130 | + duration = helpers.youtube_entry(youtube_id)['duration_str'], |
| 131 | + **conf) |
98 | 132 |
|
99 |
| -xml_org = tree.getroot().attrib['org'] |
100 |
| -xml_course = tree.getroot().attrib['course'] |
101 |
| -xml_url_name = tree.getroot().attrib['url_name'] |
102 |
| -xml_course_name = tree.getroot().attrib['display_name'] |
| 133 | + item_dict['enclosure'] = PyRSS2Gen.Enclosure(url=urlparse.urljoin(conf['url_base'], base_filename), |
| 134 | + length=length, |
| 135 | + type=conf['mimetype']) |
| 136 | + items.append(PyRSS2Gen.RSSItem(**item_dict)) |
103 | 137 |
|
104 | 138 | rss = PyRSS2Gen.RSS2(
|
105 | 139 | title = tree.getroot().attrib['display_name'],
|
106 | 140 | link = args.course_url,
|
107 |
| - description = "A prototype podcast of the videos from {coursename}, a course from {org} on edX. The full course, including assessments, is available, free-of-charge, at {course_url}. {feedtype} Note that this is an interactive course; in some cases, the videos may be difficult to follow without the integrated interactive content on http://www.edx.org.".format(coursename=xml_course_name, org=xml_org, course_url = args.course_url, feedtype = vfp[video_format]['vdc']), |
| 141 | + description = conf["course_description"].format(**conf), |
108 | 142 | lastBuildDate = datetime.datetime.now(),
|
109 | 143 | items = items,
|
110 | 144 | managingEditor = "edX Learning Sciences"
|
|
113 | 147 | ## Write output to a file
|
114 | 148 | data = StringIO.StringIO()
|
115 | 149 | rss.write_xml(data)
|
116 |
| -output_filename = "output/{org}_{course}_{url_name}_{format}.rss".format(org = xml_org, |
117 |
| - course = xml_course, |
118 |
| - url_name = xml_url_name, |
| 150 | +output_filename = "output/{org}_{course}_{url_name}_{format}.rss".format(org = conf['course_org'], |
| 151 | + course = conf['course_number'], |
| 152 | + url_name = conf['course_id'], |
119 | 153 | format = video_format)
|
120 | 154 | f = open(output_filename, "w")
|
121 | 155 | f.write(xml.dom.minidom.parseString(data.getvalue()).toprettyxml())
|
122 | 156 | f.close()
|
| 157 | +print "Saved ", output_filename |
0 commit comments