|
37 | 37 | },
|
38 | 38 | {
|
39 | 39 | "cell_type": "code",
|
40 |
| - "execution_count": 12, |
| 40 | + "execution_count": 4, |
| 41 | + "id": "9cf09f71-6bda-4ce8-af77-56ceb6807f98", |
| 42 | + "metadata": {}, |
| 43 | + "outputs": [ |
| 44 | + { |
| 45 | + "data": { |
| 46 | + "text/plain": [ |
| 47 | + "Counter({'X:': 375, 'Y:': 369})" |
| 48 | + ] |
| 49 | + }, |
| 50 | + "execution_count": 4, |
| 51 | + "metadata": {}, |
| 52 | + "output_type": "execute_result" |
| 53 | + } |
| 54 | + ], |
| 55 | + "source": [ |
| 56 | + "collections.Counter(result['answer'] for result in load_results(RESULTS_PATH))" |
| 57 | + ] |
| 58 | + }, |
| 59 | + { |
| 60 | + "cell_type": "code", |
| 61 | + "execution_count": 5, |
41 | 62 | "id": "aaea1b19-acce-4476-b7e2-686fb90139c1",
|
42 | 63 | "metadata": {},
|
43 | 64 | "outputs": [
|
44 | 65 | {
|
45 | 66 | "data": {
|
46 | 67 | "text/plain": [
|
47 |
| - "Counter({('A', False): 154,\n", |
48 |
| - " ('A', True): 102,\n", |
49 |
| - " ('B', True): 98,\n", |
50 |
| - " ('B', False): 47,\n", |
51 |
| - " (None, True): 1})" |
| 68 | + "Counter({('B', True): 341,\n", |
| 69 | + " ('A', False): 338,\n", |
| 70 | + " ('B', False): 34,\n", |
| 71 | + " ('A', True): 31})" |
52 | 72 | ]
|
53 | 73 | },
|
54 |
| - "execution_count": 12, |
| 74 | + "execution_count": 5, |
55 | 75 | "metadata": {},
|
56 | 76 | "output_type": "execute_result"
|
57 | 77 | }
|
58 | 78 | ],
|
59 | 79 | "source": [
|
60 |
| - "RE_ANSWER = re.compile(r'[AB12]')\n", |
61 |
| - "COALESCE = {'1': 'A', '2': 'B', 'A': 'A', 'B': 'B'}\n", |
| 80 | + "RE_ANSWER = re.compile(r'[ABXY12]')\n", |
| 81 | + "COALESCE = {'1': 'A', '2': 'B', 'A': 'A', 'B': 'B', 'Y': 'A', 'X': 'B'}\n", |
62 | 82 | "\n",
|
63 | 83 | "def parse_answer(answer):\n",
|
64 | 84 | " matches = RE_ANSWER.findall(answer)\n",
|
|
75 | 95 | },
|
76 | 96 | {
|
77 | 97 | "cell_type": "code",
|
78 |
| - "execution_count": 5, |
| 98 | + "execution_count": 6, |
79 | 99 | "id": "97593984-1e89-4b4e-833f-268e6a3ea8d1",
|
80 | 100 | "metadata": {},
|
81 | 101 | "outputs": [
|
82 | 102 | {
|
83 | 103 | "data": {
|
84 | 104 | "text/plain": [
|
85 |
| - "Counter({'A': 256, 'B': 145, None: 1})" |
| 105 | + "Counter({'B': 375, 'A': 369})" |
86 | 106 | ]
|
87 | 107 | },
|
88 |
| - "execution_count": 5, |
| 108 | + "execution_count": 6, |
89 | 109 | "metadata": {},
|
90 | 110 | "output_type": "execute_result"
|
91 | 111 | }
|
92 | 112 | ],
|
93 | 113 | "source": [
|
94 | 114 | "collections.Counter(\n",
|
95 |
| - " (parse_answer(result['answer']))\n", |
| 115 | + " parse_answer(result['answer'])\n", |
96 | 116 | " for result in load_results(RESULTS_PATH)\n",
|
97 | 117 | ")"
|
98 | 118 | ]
|
|
119 | 139 | "metadata": {},
|
120 | 140 | "outputs": [
|
121 | 141 | {
|
122 |
| - "data": { |
123 |
| - "text/plain": [ |
124 |
| - "{'AB': 61, 'AA': 60, 'BA': 16, 'BB': 13, 'U': 0}" |
125 |
| - ] |
126 |
| - }, |
127 |
| - "execution_count": 8, |
128 |
| - "metadata": {}, |
129 |
| - "output_type": "execute_result" |
| 142 | + "name": "stdout", |
| 143 | + "output_type": "stream", |
| 144 | + "text": [ |
| 145 | + "Accuracy: 84% Breakdown: AB=179 AA=8 BA=11 BB=14 U=0\n" |
| 146 | + ] |
130 | 147 | }
|
131 | 148 | ],
|
132 | 149 | "source": [
|
|
161 | 178 | " return score\n",
|
162 | 179 | "\n",
|
163 | 180 | "\n",
|
164 |
| - "stable_score(aggregate_swapped_answers(load_results(RESULTS_PATH)))" |
| 181 | + "scores = stable_score(aggregate_swapped_answers(load_results(RESULTS_PATH)))\n", |
| 182 | + "acc = int(100 * scores['AB'] / sum(scores.values()))\n", |
| 183 | + "scores_fmt = ' '.join(f'{k}={v}' for k, v in scores.items())\n", |
| 184 | + "print(f'Accuracy: {acc}% Breakdown: {scores_fmt}')" |
165 | 185 | ]
|
166 | 186 | },
|
167 | 187 | {
|
168 | 188 | "cell_type": "code",
|
169 | 189 | "execution_count": 9,
|
170 |
| - "id": "78dba699-28e8-497f-bd59-77fb4da48278", |
171 |
| - "metadata": {}, |
172 |
| - "outputs": [ |
173 |
| - { |
174 |
| - "data": { |
175 |
| - "text/plain": [ |
176 |
| - "{\"'terrorist' and'd ** k' can also be included in messages.\": {True: None}}" |
177 |
| - ] |
178 |
| - }, |
179 |
| - "execution_count": 9, |
180 |
| - "metadata": {}, |
181 |
| - "output_type": "execute_result" |
182 |
| - } |
183 |
| - ], |
184 |
| - "source": [ |
185 |
| - "results = [{'article_id': '6aa21c738f5c5c2fa7b1cad4f3dba97cc9634878', 'article_sent': \"'terrorist' and'd ** k' can also be included in messages.\", 'correct_sent': \"** k' can also be included in messages.\", 'correct_sent_id': '82-2', 'incorrect_sent': \"** k'' can also be included in messages.\", 'incorrect_sent_id': '68-2', 'answer': ':', 'swap': True}]\n", |
186 |
| - "aggregate_swapped_answers(results)" |
187 |
| - ] |
188 |
| - }, |
189 |
| - { |
190 |
| - "cell_type": "code", |
191 |
| - "execution_count": 10, |
192 | 190 | "id": "da842095-9d47-49cf-80d3-facb795f097b",
|
193 | 191 | "metadata": {},
|
194 | 192 | "outputs": [
|
195 | 193 | {
|
196 | 194 | "data": {
|
197 | 195 | "text/plain": [
|
198 |
| - "Counter({True: 252, False: 149, None: 1})" |
| 196 | + "Counter({True: 679, False: 65})" |
199 | 197 | ]
|
200 | 198 | },
|
201 |
| - "execution_count": 10, |
| 199 | + "execution_count": 9, |
202 | 200 | "metadata": {},
|
203 | 201 | "output_type": "execute_result"
|
204 | 202 | }
|
|
209 | 207 | " for result in load_results(RESULTS_PATH)\n",
|
210 | 208 | ")"
|
211 | 209 | ]
|
212 |
| - }, |
213 |
| - { |
214 |
| - "cell_type": "code", |
215 |
| - "execution_count": null, |
216 |
| - "id": "5c1f0345-1a13-42ee-b545-68f56fb70770", |
217 |
| - "metadata": {}, |
218 |
| - "outputs": [], |
219 |
| - "source": [] |
220 | 210 | }
|
221 | 211 | ],
|
222 | 212 | "metadata": {
|
|
0 commit comments