Skip to content

Commit b4e7ade

Browse files
Add CR_CREATE_DATE and update CR_SUBLEVEL, CR_COU_COPYRIGHT_HOLDER (#11)
* Add course start date * Add sublevel function * Enhance docstring- remove pylint character warning * Remove cli.main from __init__.py and update README * Update README; env variable instructions * Update CR_COU_COPYRIGHT_HOLDER * Use API_BASE_URL env variable * Update poetry lock
1 parent 320a840 commit b4e7ade

File tree

12 files changed

+1374
-1116
lines changed

12 files changed

+1374
-1116
lines changed

README.md

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -44,12 +44,17 @@ Therefore, the above commands will generate `private/output/ocw_oer_export.csv`
4444

4545
If you want to change this, you will not only have to change the `output_path` in the function (`create_csv` or `create_json`) but also have to change the mapping in `docker-compose.yml`.
4646

47+
## Environment Variables
48+
49+
By default, this project uses MIT Open's Production API, as given in `ocw_oer_export/config.py`.
50+
To use the RC API or local, create an environment file, `.env` in the project's root directory and add the relevant base URL:
51+
Eg. `API_BASE_URL=https://mitopen-rc.odl.mit.edu` or `API_BASE_URL=http://localhost:8063`
52+
4753
## Requirements
4854

4955
For successful execution and correct output, ensure the [MIT Open's API](https://mit-open-rc.odl.mit.edu//api/v1/courses/?platform=ocw) contains the following fields:
5056

51-
`title`, `url`, `description`, `topics`, `course_feature`, `runs: instructors`
52-
57+
`title`, `url`, `runs: level`, `description`, `topics`, `runs: instructors`, `runs: semester`, `runs: year`, `course_feature`
5358
Additionally, the `mapping_files` should be up-to-date. If new topics are added in OCW without corresponding mappings in `ocw_oer_export/mapping_files/ocw_topic_to_oer_subject.csv`, this will lead to `null` entries for those topics in the CSV (`CR_SUBJECT`). In addition to that, make sure `fm_keywords_exports.csv` is also present.
5459

5560
## Tests

ocw_oer_export/__init__.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,8 @@
1-
__all__ = ["create_json", "create_csv", "main"]
1+
__all__ = ["create_json", "create_csv"]
22

33
import logging
44

55
from .create_csv import create_csv
66
from .create_json import create_json
7-
from .cli import main
87

98
logging.root.setLevel(logging.INFO)

ocw_oer_export/cli.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@ def main():
2626
parser.add_argument(
2727
"--input_path",
2828
default="/private/output/ocw_api_data.json",
29-
help="Output path for the CSV file",
29+
help="Input path for the JSON file",
3030
)
3131
parser.add_argument(
3232
"--output_path",

ocw_oer_export/config.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
"""
2+
Module for loading environment settings and setting API base URL based on the current environment.
3+
"""
4+
import os
5+
from dotenv import load_dotenv
6+
7+
load_dotenv()
8+
9+
API_BASE_URL = os.getenv("API_BASE_URL", "https://mitopen.odl.mit.edu")
10+
API_URL = f"{API_BASE_URL}/api/v1/courses/?platform=ocw"

ocw_oer_export/constants.py

Lines changed: 0 additions & 4 deletions
This file was deleted.

ocw_oer_export/create_csv.py

Lines changed: 45 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88

99
from .client import extract_data_from_api
1010
from .data_handler import extract_data_from_json
11-
from .constants import API_URL
11+
from .config import API_URL
1212
from .utilities import normalize_course_url, normalize_keywords, text_cleanup
1313

1414

@@ -56,6 +56,26 @@ def create_ocw_topic_to_oer_subject_mapping(path=None, file_name=None):
5656
return {row["OCW Topic"]: row["OER Subject"] for row in reader}
5757

5858

59+
def get_cr_sublevel(levels):
60+
"""Set the value(s) of CR_SUBLEVEL based on the course levels."""
61+
level_mappings = {
62+
"Undergraduate": ["Community College/Lower Division", "College/Upper Division"],
63+
"Graduate": ["Graduate/Professional"],
64+
"High School": ["High School", "Community College/Lower Division"],
65+
"Non-Credit": ["Career/Technical Education"],
66+
}
67+
sublevels = [
68+
sublevel for level in levels for sublevel in level_mappings.get(level["name"])
69+
]
70+
return "|".join(sorted(set(sublevels)))
71+
72+
73+
def get_description_in_plain_text(description):
74+
"""Get Course Resource plain text description by cleaning up markdown and HTML."""
75+
cleaned_description = text_cleanup(description)
76+
return cleaned_description
77+
78+
5979
def get_cr_subjects(ocw_topics_mapping, ocw_course_topics):
6080
"""
6181
Get OER formatted Course Resource Subjects list.
@@ -89,6 +109,22 @@ def get_cr_keywords(fm_ocw_keywords_mapping, list_of_topics_objs, course_url):
89109
return "|".join(topic["name"] for topic in list_of_topics_objs)
90110

91111

112+
def get_cr_create_date(semester, year):
113+
"""Convert a semester and year into a ballpark start date."""
114+
semester_start_dates = {
115+
"Fall": "09-01",
116+
"Spring": "02-01",
117+
"Summer": "06-01",
118+
"January IAP": "01-01",
119+
}
120+
start_date = semester_start_dates.get(semester)
121+
if start_date and year:
122+
return f"{year}-{start_date}"
123+
if year:
124+
return f"{year}-01-01"
125+
return ""
126+
127+
92128
def get_cr_authors(list_of_authors_objs):
93129
"""Get OER formatted Course Resource Authors list."""
94130
return "|".join(
@@ -136,21 +172,15 @@ def get_cr_accessibility(ocw_course_feature_tags):
136172
return "|".join(tags)
137173

138174

139-
def get_description_in_plain_text(description):
140-
"""Get Course Resource plain text description by cleaning up markdown and HTML."""
141-
cleaned_description = text_cleanup(description)
142-
return cleaned_description
143-
144-
145175
def transform_single_course(course, ocw_topics_mapping, fm_ocw_keywords_mapping):
146176
"""Transform a single course according to OER template."""
147177
course_runs = course["runs"][0]
148178
return {
149179
"CR_TITLE": course["title"],
150180
"CR_URL": course_runs["url"],
151181
"CR_MATERIAL_TYPE": "Full Course",
152-
"CR_Media_Formats": "Text/HTML",
153-
"CR_SUBLEVEL": "null",
182+
"CR_MEDIA_FORMATS": "Text/HTML",
183+
"CR_SUBLEVEL": get_cr_sublevel(course_runs["level"]),
154184
"CR_ABSTRACT": get_description_in_plain_text(course_runs["description"]),
155185
"CR_LANGUAGE": "en",
156186
"CR_COU_TITLE": "Creative Commons Attribution Non Commercial Share Alike 4.0",
@@ -159,11 +189,14 @@ def transform_single_course(course, ocw_topics_mapping, fm_ocw_keywords_mapping)
159189
"CR_KEYWORDS": get_cr_keywords(
160190
fm_ocw_keywords_mapping, course["topics"], course_runs["url"]
161191
),
192+
"CR_CREATE_DATE": get_cr_create_date(
193+
course_runs["semester"], course_runs["year"]
194+
),
162195
"CR_AUTHOR_NAME": get_cr_authors(course_runs["instructors"]),
163196
"CR_PROVIDER": "MIT",
164197
"CR_PROVIDER_SET": "MIT OpenCourseWare",
165198
"CR_COU_URL": "https://creativecommons.org/licenses/by-nc-sa/4.0/",
166-
"CR_COU_COPYRIGHT_HOLDER": "MIT",
199+
"CR_COU_COPYRIGHT_HOLDER": get_cr_authors(course_runs["instructors"]),
167200
"CR_EDUCATIONAL_USE": get_cr_educational_use(course["course_feature"]),
168201
"CR_ACCESSIBILITY": get_cr_accessibility(course["course_feature"]),
169202
}
@@ -209,14 +242,15 @@ def create_csv(
209242
"CR_TITLE",
210243
"CR_URL",
211244
"CR_MATERIAL_TYPE",
212-
"CR_Media_Formats",
245+
"CR_MEDIA_FORMATS",
213246
"CR_SUBLEVEL",
214247
"CR_ABSTRACT",
215248
"CR_LANGUAGE",
216249
"CR_COU_TITLE",
217250
"CR_PRIMARY_USER",
218251
"CR_SUBJECT",
219252
"CR_KEYWORDS",
253+
"CR_CREATE_DATE",
220254
"CR_AUTHOR_NAME",
221255
"CR_PROVIDER",
222256
"CR_PROVIDER_SET",

ocw_oer_export/create_json.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
import json
55
import logging
66

7-
from .constants import API_URL
7+
from .config import API_URL
88
from .client import extract_data_from_api
99

1010
logging.basicConfig(level=logging.INFO)

poetry.lock

Lines changed: 15 additions & 2 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

pyproject.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@ nodeenv = "1.8.0"
2626
platformdirs = "4.2.0"
2727
pre-commit = "3.6.2"
2828
py = "1.11.0"
29+
python-dotenv = "^1.0.1"
2930
PyYAML = "6.0.1"
3031
requests = "2.31.0"
3132
retry = "0.9.2"

0 commit comments

Comments
 (0)