Skip to content

Commit 90acd3d

Browse files
committed
Merge develop back in
2 parents f54b07e + 08210a1 commit 90acd3d

18 files changed

+810
-6
lines changed

CHANGELOG.md

+26
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,30 @@
11
# Changelog
2+
3+
# 0.10.28
4+
1. Add intent label tasks
5+
([#646](https://github.com/praekeltfoundation/ndoh-hub/pull/646))
6+
7+
# 0.10.27
8+
1. Bump django from 4.2.15 to 4.2.16
9+
([#628](https://github.com/praekeltfoundation/ndoh-hub/pull/628))
10+
2. Update CHANGELOG
11+
([#636](https://github.com/praekeltfoundation/ndoh-hub/pull/636))
12+
3. add test coverage to PR comment using py-cov-action
13+
([#638](https://github.com/praekeltfoundation/ndoh-hub/pull/638))
14+
4. update gibberish detection function
15+
([#640](https://github.com/praekeltfoundation/ndoh-hub/pull/640))
16+
5. update gibberish response
17+
([#641](https://github.com/praekeltfoundation/ndoh-hub/pull/641))
18+
6. update gibberish response
19+
([#642](https://github.com/praekeltfoundation/ndoh-hub/pull/642))
20+
7. update gibberish response dict
21+
([#643](https://github.com/praekeltfoundation/ndoh-hub/pull/643))
22+
8. return a dict in utils when gibberish detected
23+
([#644](https://github.com/praekeltfoundation/ndoh-hub/pull/644))
24+
# 0.10.26
25+
1. Remove seaworthy from hub
26+
([#635](https://github.com/praekeltfoundation/ndoh-hub/pull/635))
27+
([#634](https://github.com/praekeltfoundation/ndoh-hub/pull/634))
228
# 0.10.25
329
1. Remove mqr from hub
430
([#633](https://github.com/praekeltfoundation/ndoh-hub/pull/633))

aaq/tests/test_utils.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -133,6 +133,6 @@ def test_search_gibberish(self):
133133
search_request = responses.calls[0]
134134

135135
assert search_request.response.status_code == 400
136-
assert response.data == {
136+
assert response == {
137137
"message": "Gibberish Detected",
138138
}

aaq/utils.py

+1-5
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,6 @@
33
import requests
44
from django.conf import settings
55
from rest_framework import status
6-
from rest_framework.response import Response
76

87

98
def check_urgency_v2(message_text):
@@ -41,10 +40,7 @@ def search(query_text, generate_llm_response, query_metadata):
4140
):
4241
error_detail = response.json().get("detail", "")
4342
if "Gibberish text detected" in error_detail:
44-
json_msg = {
45-
"message": "Gibberish Detected",
46-
}
47-
return Response(json_msg, status=status.HTTP_200_OK)
43+
return {"message": "Gibberish Detected"}
4844

4945
response.raise_for_status()
5046

eventstore/tasks.py

+44
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
from celery.exceptions import SoftTimeLimitExceeded
1313
from django.conf import settings
1414
from django.utils import dateparse, timezone
15+
from requests.auth import HTTPBasicAuth
1516
from requests.exceptions import RequestException
1617
from temba_client.exceptions import TembaHttpError
1718

@@ -778,3 +779,46 @@ def process_whatsapp_template_send_status():
778779
status.status = WhatsAppTemplateSendStatus.Status.ACTION_COMPLETED
779780
status.action_completed_at = timezone.now()
780781
status.save()
782+
783+
784+
@app.task(
785+
autoretry_for=(RequestException, SoftTimeLimitExceeded),
786+
retry_backoff=True,
787+
max_retries=15,
788+
acks_late=True,
789+
soft_time_limit=10,
790+
time_limit=15,
791+
)
792+
def get_inbound_intent(text):
793+
params = {"question": text}
794+
response = requests.get(
795+
urljoin(settings.INTENT_CLASSIFIER_URL, "/nlu/"),
796+
params=params,
797+
auth=HTTPBasicAuth(
798+
settings.INTENT_CLASSIFIER_USER, settings.INTENT_CLASSIFIER_PASS
799+
),
800+
)
801+
response.raise_for_status()
802+
return response.json()["intent"]
803+
804+
805+
@app.task(
806+
autoretry_for=(RequestException, SoftTimeLimitExceeded),
807+
retry_backoff=True,
808+
max_retries=15,
809+
acks_late=True,
810+
soft_time_limit=10,
811+
time_limit=15,
812+
)
813+
def label_whatsapp_message(label, message_id):
814+
headers = {
815+
"Authorization": "Bearer {}".format(settings.TURN_TOKEN),
816+
"content-type": "application/json",
817+
"Accept": "application/vnd.v1+json",
818+
}
819+
response = requests.post(
820+
urljoin(settings.TURN_URL, f"/v1/messages/{message_id}/labels"),
821+
json={"labels": [label]},
822+
headers=headers,
823+
)
824+
response.raise_for_status()

eventstore/tests/test_whatsapp_actions.py

+71
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
import json
12
from datetime import timedelta
23
from unittest.mock import Mock, patch
34

@@ -203,6 +204,76 @@ def test_handle_edd_label_disabled(self):
203204
handle_inbound(message)
204205
handle.assert_not_called()
205206

207+
@responses.activate
208+
def test_intent_classification(self):
209+
inbound_text = "The test inbound message body"
210+
responses.add(
211+
responses.GET,
212+
"http://intent-classifier/nlu/",
213+
json={
214+
"question": "The test inbound message body",
215+
"intent": "Test Label",
216+
"confidence": 90,
217+
},
218+
)
219+
220+
responses.add(
221+
responses.POST, "http://turn/v1/messages/msg-id-1/labels", json={}
222+
)
223+
224+
message = Mock()
225+
message.has_label.return_value = False
226+
message.id = "msg-id-1"
227+
message.type = "text"
228+
message.data = {"text": {"body": inbound_text}}
229+
230+
handle_inbound(message)
231+
232+
[intent_call, label_call] = responses.calls
233+
234+
self.assertEqual(intent_call.request.params, {"question": inbound_text})
235+
self.assertEqual(
236+
intent_call.request.headers["Authorization"],
237+
"Basic bmx1X3VzZXI6bmx1X3Bhc3M=",
238+
)
239+
240+
self.assertEqual(
241+
json.loads(label_call.request.body), {"labels": ["Test Label"]}
242+
)
243+
self.assertEqual(
244+
label_call.request.headers["Authorization"],
245+
"Bearer turn-token",
246+
)
247+
248+
@responses.activate
249+
@override_settings(INTENT_CLASSIFIER_URL=None)
250+
def test_intent_classification_disabled(self):
251+
inbound_text = "The test inbound message body"
252+
253+
message = Mock()
254+
message.has_label.return_value = False
255+
message.id = "msg-id-1"
256+
message.type = "text"
257+
message.data = {"text": {"body": inbound_text}}
258+
259+
handle_inbound(message)
260+
261+
self.assertEqual(len(responses.calls), 0)
262+
263+
@responses.activate
264+
def test_intent_classification_yes(self):
265+
inbound_text = "YES"
266+
267+
message = Mock()
268+
message.has_label.return_value = False
269+
message.id = "msg-id-1"
270+
message.type = "text"
271+
message.data = {"text": {"body": inbound_text}}
272+
273+
handle_inbound(message)
274+
275+
self.assertEqual(len(responses.calls), 0)
276+
206277

207278
class UpdateRapidproAlertOptoutTests(DjangoTestCase):
208279
def test_contact_update_is_called(self):

eventstore/whatsapp_actions.py

+10
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,9 @@
44
from eventstore.models import SMS_CHANNELTYPE, DeliveryFailure, Event, OptOut
55
from eventstore.tasks import (
66
async_create_flow_start,
7+
get_inbound_intent,
78
get_rapidpro_contact_by_msisdn,
9+
label_whatsapp_message,
810
send_helpdesk_response_to_dhis2,
911
update_rapidpro_contact,
1012
)
@@ -57,6 +59,14 @@ def handle_inbound(message):
5759
if not settings.DISABLE_EDD_LABEL_FLOW and message.has_label("EDD ISSUE"):
5860
handle_edd_message(message)
5961

62+
if settings.INTENT_CLASSIFIER_URL and message.type == "text":
63+
text = message.data["text"]["body"]
64+
if text.lower() != "yes":
65+
chain(
66+
get_inbound_intent.s(),
67+
label_whatsapp_message.s(message.id),
68+
).delay(text)
69+
6070

6171
def update_rapidpro_alert_optout(message):
6272
update_rapidpro_contact.delay(

ndoh_hub/settings.py

+4
Original file line numberDiff line numberDiff line change
@@ -408,3 +408,7 @@
408408
WHATSAPP_TEMPLATE_SEND_TIMEOUT_HOURS = env.int(
409409
"WHATSAPP_TEMPLATE_SEND_TIMEOUT_HOURS", 3
410410
)
411+
412+
INTENT_CLASSIFIER_URL = env.str("INTENT_CLASSIFIER_URL", None)
413+
INTENT_CLASSIFIER_USER = env.str("INTENT_CLASSIFIER_USER", None)
414+
INTENT_CLASSIFIER_PASS = env.str("INTENT_CLASSIFIER_PASS", None)

ndoh_hub/testsettings.py

+4
Original file line numberDiff line numberDiff line change
@@ -26,3 +26,7 @@
2626
AAQ_CORE_API_URL = "http://aaqcore"
2727
AAQ_UD_API_URL = "http://aaqud"
2828
AAQ_V2_API_URL = "http://aaq_v2"
29+
30+
INTENT_CLASSIFIER_URL = "http://intent-classifier"
31+
INTENT_CLASSIFIER_USER = "nlu_user"
32+
INTENT_CLASSIFIER_PASS = "nlu_pass"

scripts/migrate_to_turn/README.md

+90
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,90 @@
1+
# Migrate contacts from Rapidpro to Turn
2+
3+
The scripts in this folder will be used to get all the contact information from Rapidpro and update Turn contacts.
4+
5+
The fetch part is based on the modified date we have on Rapidpro contacts and the the idea is to start backfilling before we go live.
6+
We can do this in batches and repeat until we do the actual switch over from Rapidpro to Turn.
7+
8+
## SCRIPTS
9+
10+
There is one fetch script and 2 update script options. We can test out the different update option with larger batches and see which one works the best.
11+
12+
### fetch_rapidpro_contacts.py
13+
14+
This fetches all the contacts from Rapidpro based on the start and end date provided, you can also configure a limit.
15+
16+
The `FIELD_MAPPING` variable should be updated with all the fields we want to move over.
17+
18+
The script will write all the contacts to a file with the start and end date in the name.
19+
20+
It will also output the latest modified on date in the batch, this can then be used as a start date to get the next batch.
21+
22+
### update_turn_contacts.py
23+
24+
Update Turn contacts using the Turn contacts api asynchronously.
25+
26+
This script takes a filename of a file generated by the `fetch_rapidpro_contacts.py` script as a parameter and updates all the contact in the file on Turn.
27+
28+
It is an async script and `CONCURRENCY` can be updated to control the speed, to avoid hitting the Turn API rate limits.
29+
30+
Command to run:
31+
`python scripts/migrate_to_turn/update_turn_contacts.py contacts-2025-01-01-2025-01-07.csv > update_turn_contacts.json`
32+
33+
The output is sent to a json file, which can be used to retry failed requests.
34+
35+
### update_turn_contacts_queue.py
36+
37+
Update Turn contacts using the Turn contacts api asynchronously but using a queue and workers. It will sleep if it gets rate limited by Turn.
38+
39+
This script takes a filename of a file generated by the `fetch_rapidpro_contacts.py` script as a parameter and updates all the contact in the file on Turn.
40+
41+
It is an async script and `WORKER_COUNT` can be configured, to change the amount being processed at a time.
42+
43+
Command to run:
44+
`python scripts/migrate_to_turn/update_turn_contacts_queue.py contacts-2025-01-01-2025-01-07.csv > update_turn_contacts.json`
45+
46+
The output is sent to a json file, which can be used to retry failed requests.
47+
48+
### update_turn_contacts_bulk.py
49+
50+
Update Turn contacts using the Turn bulk update contacts API. The API is currently limited to allow files of a maximum of 1 Megabyte in size.
51+
52+
This script takes the csv provided and sends it directly yto the Turn API. It will output the results to a new csv file.
53+
54+
Command to run:
55+
`python scripts/migrate_to_turn/update_turn_contacts_bulk.py contacts-2024-01-01-2025-01-07.csv`
56+
57+
### compare_contacts.py
58+
59+
This script can be used to compare specific contacts.
60+
61+
Add the WhatsApp IDs of the contacts you want to compare to the WA_IDS list in the script.
62+
63+
The script will get their Rapidpro and Turn contact details and output everything to a `compare.csv` file.
64+
65+
Command to run:
66+
`python scripts/migrate_to_turn/compare_contacts.py`
67+
68+
## FIELD_MAPPING
69+
70+
This is a dictionary the script uses to figure out where to get the data, how to process it and where it should go.
71+
72+
The key is the field name in Rapidpro, the value determinues the rest:
73+
74+
`turn_name` - The destination field name in Turn
75+
76+
`process` - The function to call to process the value, like change datetime to format Turn understands. The functions live in the `process_fields.py` file and that's where any new process field functions should be added.
77+
78+
`type` - Where the value comes from, `default` Rapidpro contacts field or a `custom` one added by Reach.
79+
80+
## STEPS
81+
82+
1. Make sure the `FIELD_MAPPING` is configured
83+
1. Set the required environment variables: `TURN_TOKEN` and `RAPIDPRO_TOKEN`.
84+
1. Update the start date, end date, limit and field mapping in `fetch_rapidpro_contacts.py` script
85+
1. Run `python scripts/migrate_to_turn/fetch_rapidpro_contacts.py` and take note of the last modified date and the filename.
86+
1. Run `python scripts/migrate_to_turn/update_turn_contacts.py contacts-2025-01-01-2025-01-07.csv > update_turn_contacts.json`
87+
1. Use jq to check if there were any errors `jq .response.status update_turn_contacts.json | sort | uniq -c`
88+
1. To retry errors, run `cat update_turn_contacts.json | python scripts/migrate_to_rapidpro/retry_requests.py > update_turn_contacts2.json`
89+
1. Repeat previous two steps until all contacts successfully completed.
90+
1. Update the start and end date in `fetch_rapidpro_contacts.py` script. Repeat from step 3.
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,70 @@
1+
import csv
2+
import os
3+
from urllib.parse import urljoin
4+
5+
import requests
6+
from fetch_rapidpro_contacts import FIELD_MAPPING
7+
from temba_client.v2 import TembaClient
8+
9+
RAPIDPRO_URL = "https://rapidpro.qa.momconnect.co.za"
10+
TURN_URL = "https://whatsapp-praekelt-cloud.turn.io"
11+
12+
WA_IDS = ["27836378531"]
13+
14+
rapidpro_client = TembaClient(RAPIDPRO_URL, os.environ["RAPIDPRO_TOKEN"])
15+
16+
17+
def get_rapidpro_contact(wa_id):
18+
urn = f"whatsapp:{wa_id}"
19+
contact = rapidpro_client.get_contacts(urn=urn).first(retry_on_rate_exceed=True)
20+
data = {}
21+
22+
for rapidpro_field, turn_details in FIELD_MAPPING.items():
23+
if turn_details["type"] == "default":
24+
data[rapidpro_field] = getattr(contact, rapidpro_field)
25+
else:
26+
data[rapidpro_field] = contact.fields[rapidpro_field]
27+
28+
return data
29+
30+
31+
def get_turn_contact(wa_id):
32+
headers = {
33+
"Authorization": "Bearer {}".format(os.environ["TURN_TOKEN"]),
34+
"content-type": "application/json",
35+
"Accept": "application/vnd.v1+json",
36+
}
37+
response = requests.get(
38+
urljoin(TURN_URL, "/v1/contacts/{}/profile".format(wa_id)),
39+
headers=headers,
40+
)
41+
contact = response.json()["fields"]
42+
43+
data = {}
44+
for rapidpro_field, turn_details in FIELD_MAPPING.items():
45+
data[rapidpro_field] = contact[turn_details["turn_name"]]
46+
return data
47+
48+
49+
def compare_contacts():
50+
rows = []
51+
for wa_id in WA_IDS:
52+
rapidpro_data = get_rapidpro_contact(wa_id)
53+
turn_data = get_turn_contact(wa_id)
54+
55+
row = {"wa_id": wa_id}
56+
for rapidpro_field in FIELD_MAPPING.keys():
57+
row[f"RP {rapidpro_field}"] = rapidpro_data[rapidpro_field]
58+
row[f"TURN {rapidpro_field}"] = turn_data[rapidpro_field]
59+
60+
rows.append(row)
61+
62+
with open("compare.csv", "w", encoding="utf-8") as f:
63+
fieldnames = rows[0].keys()
64+
writer = csv.DictWriter(f, fieldnames=fieldnames)
65+
writer.writeheader()
66+
writer.writerows(rows)
67+
68+
69+
if __name__ == "__main__":
70+
compare_contacts()

0 commit comments

Comments
 (0)