Skip to content

Commit 3471fc0

Browse files
Merge pull request #154 from edx/ihassan/GSRE_2485_added_job_retire_users
chore: Add script to remove certificate files for retired users from S3
2 parents b0bba28 + 0e9793e commit 3471fc0

File tree

2 files changed

+123
-0
lines changed

2 files changed

+123
-0
lines changed
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
../requirements.txt
Lines changed: 122 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,122 @@
1+
"""
2+
Script to delete downloadable certificates of inactive users from S3, based on RDS MySQL database entries.
3+
4+
Usage:
5+
python retired_user_cert_remover.py --db-host=my-db-host --db-name=my-db --dry-run
6+
7+
Arguments:
8+
--db-host The RDS database host.
9+
--db-name The database name.
10+
--dry-run Run the script in dry-run mode (logs actions without deleting).
11+
--db-user The RDS database user (also settable via DB_USER env var).
12+
--db-password The RDS database password (also settable via DB_PASSWORD env var).
13+
14+
Environment Variables:
15+
DB_USER Database username (alternative to --db-user).
16+
DB_PASSWORD Database password (alternative to --db-password).
17+
18+
Functionality:
19+
- Connects to an RDS MySQL database and fetches certificates for inactive users.
20+
- Targets only certificates with a valid download URL and status 'downloadable'.
21+
- Deletes corresponding certificate files from S3 (verify and download locations).
22+
- Supports dry-run mode to simulate deletions for review.
23+
24+
Example:
25+
export DB_USER=admin
26+
export DB_PASSWORD=securepass
27+
python retired_user_cert_remover.py --db-host=mydb.amazonaws.com --db-name=edxapp --dry-run
28+
"""
29+
30+
import boto3
31+
from botocore.exceptions import ClientError
32+
import pymysql
33+
import backoff
34+
import click
35+
import sys
36+
import logging
37+
38+
MAX_TRIES = 5
39+
# Configure logging
40+
LOGGER = logging.getLogger(__name__)
41+
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
42+
43+
44+
class S3BotoWrapper:
45+
def __init__(self):
46+
self.client = boto3.client("s3")
47+
48+
@backoff.on_exception(backoff.expo, ClientError, max_tries=MAX_TRIES)
49+
def delete_object(self, bucket, key):
50+
return self.client.delete_object(Bucket=bucket, Key=key)
51+
52+
53+
def fetch_certificates_to_delete(db_host, db_user, db_password, db_name):
54+
try:
55+
connection = pymysql.connect(host=db_host, user=db_user, password=db_password, database=db_name)
56+
cursor = connection.cursor()
57+
logging.info("Running query on database...")
58+
cursor.execute("""
59+
SELECT
60+
au.id as "LMS_USER_ID",
61+
gc.course_id as "COURSE_RUN_ID",
62+
gc.id as "CERTIFICATE_ID",
63+
gc.download_url as "CERTIFICATE_URL",
64+
gc.download_uuid as "DOWNLOAD_UUID",
65+
gc.verify_uuid as "VERIFY_UUID"
66+
FROM
67+
auth_user as au
68+
JOIN
69+
certificates_generatedcertificate as gc
70+
ON
71+
gc.user_id = au.id
72+
WHERE
73+
au.is_active = 0
74+
AND gc.download_url LIKE '%%https://%%'
75+
AND gc.status = 'downloadable'
76+
ORDER BY
77+
LMS_USER_ID,
78+
COURSE_RUN_ID;
79+
""")
80+
result = cursor.fetchall()
81+
cursor.close()
82+
connection.close()
83+
return result
84+
except Exception as ex:
85+
logging.error(f"Database query failed with error: {ex}")
86+
sys.exit(1)
87+
88+
89+
def delete_certificates_from_s3(certificates, dry_run):
90+
s3_client = S3BotoWrapper()
91+
for cert in certificates:
92+
verify_uuid = cert[5] # VERIFY_UUID
93+
download_uuid = cert[4] # DOWNLOAD_UUID
94+
95+
verify_key = f"cert/{verify_uuid}"
96+
download_key = f"downloads/{download_uuid}/Certificate.pdf"
97+
try:
98+
if dry_run:
99+
logging.info(f"[Dry Run] Would delete {verify_key} from S3")
100+
logging.info(f"[Dry Run] Would delete {download_key} from S3")
101+
else:
102+
logging.info(f"Deleting {verify_key} from S3...")
103+
s3_client.delete_object("verify.edx.org", verify_key)
104+
logging.info(f"Deleting {download_key} from S3...")
105+
s3_client.delete_object("verify.edx.org", download_key)
106+
except ClientError as e:
107+
logging.error(f"Error deleting {verify_key} or {download_key}: {e}")
108+
109+
110+
@click.command()
111+
@click.option('--db-host', '-h', required=True, help='Database host')
112+
@click.option('--db-user', envvar='DB_USER', required=True, help='Database user')
113+
@click.option('--db-password', envvar='DB_PASSWORD', required=True, help='Database password')
114+
@click.option('--db-name', '-db', required=True, help='Database name')
115+
@click.option('--dry-run', is_flag=True, help='Run the script in dry-run mode without making any changes')
116+
def controller(db_host, db_user, db_password, db_name, dry_run):
117+
certificates = fetch_certificates_to_delete(db_host, db_user, db_password, db_name)
118+
delete_certificates_from_s3(certificates, dry_run)
119+
120+
121+
if __name__ == '__main__':
122+
controller()

0 commit comments

Comments
 (0)