File tree Expand file tree Collapse file tree 1 file changed +272
-0
lines changed Expand file tree Collapse file tree 1 file changed +272
-0
lines changed Original file line number Diff line number Diff line change
1
+ namespace :scihist do
2
+ namespace :data_fixes do
3
+ desc "" "
4
+ Set ocr_requested to true for a new batch of works (june 2024)
5
+ rake scihist:data_fixes:set_ocr_requested_june_2024
6
+
7
+ Do just one:
8
+ rake scihist:data_fixes:set_ocr_requested_june_2024['aso1ecq']
9
+ " ""
10
+ task :set_ocr_requested_june_2024 , [ :work_friendlier_id ] => :environment do |t , args |
11
+
12
+ items_needing_ocr = [
13
+ '00000059p' ,
14
+ '10el2db' ,
15
+ '1abq6cv' ,
16
+ '1ndf3hf' ,
17
+ '1v53jx89f' ,
18
+ '1x93vdg' ,
19
+ '1z40kt41h' ,
20
+ '1zshph4' ,
21
+ '20kxw7k' ,
22
+ '25dyk6p' ,
23
+ '27mo804' ,
24
+ '2z10wr23f' ,
25
+ '37720d24n' ,
26
+ '3dbh0ng' ,
27
+ '3dc72as' ,
28
+ '3j333301b' ,
29
+ '3j3333398' ,
30
+ '3n203z173' ,
31
+ '3nz58ef' ,
32
+ '3r074v594' ,
33
+ '41687j69c' ,
34
+ '46smfc1' ,
35
+ '4b29b690d' ,
36
+ '4dcl1aq' ,
37
+ '4hml3f5' ,
38
+ '4qlor3l' ,
39
+ '4w5cwnu' ,
40
+ '4x51hj81r' ,
41
+ '50cnjm9' ,
42
+ '58s4wy3' ,
43
+ '5999n455n' ,
44
+ '5999n456x' ,
45
+ '5h73pw14h' ,
46
+ '5l15b5i' ,
47
+ '5m60qs97h' ,
48
+ '5sgmjgh' ,
49
+ '5w3x3vu' ,
50
+ '6108vb34k' ,
51
+ '6d56zx16p' ,
52
+ '6hz4gzn' ,
53
+ '6t053h17b' ,
54
+ '6w924c48b' ,
55
+ '73x6up6' ,
56
+ '7fmih3e' ,
57
+ '7g7sykv' ,
58
+ '7hlt38t' ,
59
+ '7kssjdm' ,
60
+ '7p05mi5' ,
61
+ '7r9fbhw' ,
62
+ '7ucpb1m' ,
63
+ '7uk1dod' ,
64
+ '7w62f955m' ,
65
+ '8910jv725' ,
66
+ '8c97kq63g' ,
67
+ '8g84mm76s' ,
68
+ '8lelsrz' ,
69
+ '8r00347' ,
70
+ '8vd3kbc' ,
71
+ '9306sz691' ,
72
+ '9880vr88p' ,
73
+ '9ag3q0u' ,
74
+ '9c67wn74v' ,
75
+ '9chrkef' ,
76
+ '9hu7g0h' ,
77
+ '9k41zd82t' ,
78
+ '9s161738c' ,
79
+ '9w0323862' ,
80
+ 'a25w764' ,
81
+ 'actytlk' ,
82
+ 'ak8mcz4' ,
83
+ 'akxmjsq' ,
84
+ 'aso1ecq' ,
85
+ 'asx7t5o' ,
86
+ 'awm78yy' ,
87
+ 'b2773w23c' ,
88
+ 'bg257g44c' ,
89
+ 'bgd84t5' ,
90
+ 'blz0d1e' ,
91
+ 'brpy6h2' ,
92
+ 'by8cen9' ,
93
+ 'bzbzq6p' ,
94
+ 'c26dktp' ,
95
+ 'c5hekt0' ,
96
+ 'c5ri8xu' ,
97
+ 'c73lqd6' ,
98
+ 'c7jixao' ,
99
+ 'cf1g27k' ,
100
+ 'cn69m481p' ,
101
+ 'cnlv4q9' ,
102
+ 'dh8qks4' ,
103
+ 'dnel7ah' ,
104
+ 'dq1vcry' ,
105
+ 'drkkqip' ,
106
+ 'dwh6udb' ,
107
+ 'dxxyzo5' ,
108
+ 'dzskowv' ,
109
+ 'ej5g6p0' ,
110
+ 'f1881n22k' ,
111
+ 'f3vunv2' ,
112
+ 'fb494960s' ,
113
+ 'fc9f8sh' ,
114
+ 'fhiw36j' ,
115
+ 'fk7gfc7' ,
116
+ 'g4f4xrr' ,
117
+ 'gh93h0704' ,
118
+ 'gt54kn070' ,
119
+ 'guxwfm4' ,
120
+ 'gx41mk00h' ,
121
+ 'gzh9c7d' ,
122
+ 'h128nf12n' ,
123
+ 'h2wpjqz' ,
124
+ 'h415pb65x' ,
125
+ 'h702q750j' ,
126
+ 'hcxstvh' ,
127
+ 'hd76s099h' ,
128
+ 'hh63sw38v' ,
129
+ 'hhohjvz' ,
130
+ 'hm50ts226' ,
131
+ 'hm50ts49m' ,
132
+ 'hmo12zt' ,
133
+ 'hnaq4i6' ,
134
+ 'hqr0vep' ,
135
+ 'i3ntrqa' ,
136
+ 'if1y8jh' ,
137
+ 'ip5xqsz' ,
138
+ 'iqir275' ,
139
+ 'j098zb837' ,
140
+ 'j7eqggo' ,
141
+ 'j965jda' ,
142
+ 'jd472x647' ,
143
+ 'jq085m03h' ,
144
+ 'jswrvtg' ,
145
+ 'juzgqqv' ,
146
+ 'jxn9x1v' ,
147
+ 'k06988730' ,
148
+ 'k4424ph' ,
149
+ 'kh04dq16g' ,
150
+ 'kjzq5sx' ,
151
+ 'kw52j813n' ,
152
+ 'kw52j944v' ,
153
+ 'lfk1l7p' ,
154
+ 'li2hswd' ,
155
+ 'liut8yp' ,
156
+ 'm039k620t' ,
157
+ 'm326m2699' ,
158
+ 'mkicy9w' ,
159
+ 'mkmg3gr' ,
160
+ 'ml2d1fx' ,
161
+ 'ms35t945j' ,
162
+ 'ms35t9537' ,
163
+ 'mw22v6127' ,
164
+ 'ng451j669' ,
165
+ 'nk61zfx' ,
166
+ 'nlp95ls' ,
167
+ 'no6jc8u' ,
168
+ 'nqee13h' ,
169
+ 'oevh9ff' ,
170
+ 'ohfepv5' ,
171
+ 'oxczuzg' ,
172
+ 'p8xwtv7' ,
173
+ 'pg6ay55' ,
174
+ 'pk02cb51k' ,
175
+ 'pn89d780h' ,
176
+ 'ppze54g' ,
177
+ 'pr8v5c1' ,
178
+ 'px4ksww' ,
179
+ 'q811kk83q' ,
180
+ 'qc0geu2' ,
181
+ 'qjvy4hr' ,
182
+ 'qkbcb6u' ,
183
+ 'qn59q441g' ,
184
+ 'r08cysh' ,
185
+ 'r207tp471' ,
186
+ 'rcgynxk' ,
187
+ 'rcofrgk' ,
188
+ 's28p4ir' ,
189
+ 's7526c98r' ,
190
+ 'sffnt1m' ,
191
+ 'sieyazl' ,
192
+ 'sj139301v' ,
193
+ 'st74cr668' ,
194
+ 't0xt4ls' ,
195
+ 'tb09j693r' ,
196
+ 'th83m023h' ,
197
+ 'torh4pp' ,
198
+ 'u1cnmj1' ,
199
+ 'u4pvoiq' ,
200
+ 'ukvpxbo' ,
201
+ 'uoeae9x' ,
202
+ 'ut99im9' ,
203
+ 'uwp76sh' ,
204
+ 'uye351l' ,
205
+ 'v405s9778' ,
206
+ 'v979v413n' ,
207
+ 'vh53ww37b' ,
208
+ 'vt150j93m' ,
209
+ 'vwn5gsm' ,
210
+ 'vzn1kaq' ,
211
+ 'w37637980' ,
212
+ 'wd375w35b' ,
213
+ 'we6qich' ,
214
+ 'whpeetf' ,
215
+ 'wkrtx7g' ,
216
+ 'wmlkiom' ,
217
+ 'wp988m09w' ,
218
+ 'ws3za03' ,
219
+ 'wsa6nnu' ,
220
+ 'wtl6m8z' ,
221
+ 'wxtwzjc' ,
222
+ 'x0sn5hu' ,
223
+ 'x633f153j' ,
224
+ 'x6rar31' ,
225
+ 'x70vejb' ,
226
+ 'x78peme' ,
227
+ 'xd07gs80t' ,
228
+ 'xd07gt25z' ,
229
+ 'xpej45y' ,
230
+ 'xw42n881j' ,
231
+ 'y11535o' ,
232
+ 'yasty4z' ,
233
+ 'yv8v2uh' ,
234
+ 'z029p529v' ,
235
+ 'z82jpz5' ,
236
+ 'zc3osyv' ,
237
+ 'zugqcua' ,
238
+ ]
239
+
240
+ items_needing_ocr = [ args [ :work_friendlier_id ] ] if args [ :work_friendlier_id ] . present?
241
+
242
+ total = items_needing_ocr . count
243
+ progress_bar = ProgressBar . create ( total : total , format : Kithe ::STANDARD_PROGRESS_BAR_FORMAT )
244
+ ocr_enqueued_count = 0
245
+ Kithe ::Indexable . index_with ( batching : true ) do
246
+ items_needing_ocr . each do |work_id |
247
+ work = Work . find_by_friendlier_id ( work_id )
248
+
249
+ if work . nil?
250
+ progress_bar . log ( "Couldn't find work #{ work_id } " )
251
+ next
252
+ end
253
+
254
+
255
+ if work . ocr_requested?
256
+ progress_bar . log ( "SKIPPING: Already turned on for #{ work_id } " )
257
+ progress_bar . increment
258
+ next
259
+ end
260
+
261
+ work . ocr_requested = true
262
+ work . save!
263
+ WorkOcrCreatorRemoverJob . set ( queue : "special_jobs" ) . perform_later ( work )
264
+ puts "\n OCR enqueued for #{ work_id } ."
265
+ ocr_enqueued_count += 1
266
+ progress_bar . increment
267
+ end
268
+ end
269
+ puts "\n OCR enqueued for #{ ocr_enqueued_count } works"
270
+ end
271
+ end
272
+ end
You can’t perform that action at this time.
0 commit comments