@@ -108,6 +108,19 @@ exit $?
108
108
109
109
** Configure instances
110
110
111
+ *** Enumerate Doubly True
112
+
113
+ #+BEGIN_SRC sh
114
+ generateDoublyTrue 401 500
115
+ #+END_SRC
116
+
117
+
118
+ Interrupted processes
119
+ - pt-301-400
120
+ - el-301-400
121
+ - ro-301-400
122
+ - el-401-500
123
+
111
124
*** Enumerate all cryptarithms
112
125
113
126
The word lists ~colors.txt~ and ~monsters.txt~ takes the most time by far (around 110K candidates).
@@ -120,6 +133,7 @@ The word lists ~colors.txt~ and ~monsters.txt~ takes the most time by far (aroun
120
133
done
121
134
#+END_SRC
122
135
136
+
123
137
*** Enumerate the longest cryptarithms
124
138
125
139
I have found manually the longest cryptarithms of the remaining word lists.
@@ -138,16 +152,79 @@ The word lists ~colors.txt~ and ~monsters.txt~ takes the most time by far (aroun
138
152
generateLong 16 capitales.txt
139
153
#+END_SRC
140
154
155
+
156
+
157
+ *** Enumerate samples
158
+
159
+ #+BEGIN_SRC sh
160
+ rm -fr samples
161
+ mkdir samples
162
+ (
163
+ cd samples
164
+ pwd
165
+ ../../R/sample-words.R -n 5 -w 80 --minlen 2 --maxlen 7 ../../words/liste.de.mots.francais.frgut.txt
166
+ )
167
+ #+END_SRC
168
+
169
+ #+BEGIN_SRC sh
170
+ function generateMaxLong() {
171
+ find samples/ -type f -print | while read PATHNAME ; do
172
+ INSTANCE="instances/"`basename -s .txt $PATHNAME`-$1-$2.dat
173
+ echo -minop $1 -maxop $2 $PATHNAME > $INSTANCE
174
+ done
175
+ }
176
+
177
+ min=35
178
+ step=2
179
+ max=46
180
+ while [ $min -lt $max ] ; do
181
+ generateMaxLong $min $((min+step-1))
182
+ min=$((min+step))
183
+ done
184
+ #+END_SRC
185
+
141
186
*** Generate languages
142
187
143
188
#+BEGIN_SRC R :results output file :file "words-10-2.txt"
144
189
base <- 10
145
190
x <- head(letters, base)
146
- xy <- expand.grid(x = x , y = x)
191
+ xy <- expand.grid(x = head(x, 2) , y = x)
147
192
cat(paste(x, '\n', collapse = ''))
148
193
cat(paste(xy$x, xy$y, '\n', sep = "", collapse = ''))
149
194
#+END_SRC
150
195
196
+ #+BEGIN_SRC R :results output file :file "words-10-2.txt"
197
+ base <- 10
198
+ x <- head(letters, base)
199
+ xy <- expand.grid(x = x, y = x)
200
+ xyz <- expand.grid(x = head(x, 1), y = tail(x, 2), z = x)
201
+ cat(paste(x, '\n', collapse = ''))
202
+ cat(paste(xy$x, xy$y, '\n', sep = "", collapse = ''))
203
+ cat(paste(xyz$x, xyz$y, xyz$z,'\n', sep = "", collapse = ''))
204
+ #+END_SRC
205
+
206
+ *** Crossword
207
+
208
+ #+BEGIN_SRC sh
209
+ rm -fr samples
210
+ mkdir samples
211
+ (
212
+ cd samples
213
+ pwd
214
+ ../../R/sample-words.R -n 60 -w 20 --minlen 2 --maxlen 3 ../../words/liste.de.mots.francais.frgut.txt
215
+ )
216
+ #+END_SRC
217
+
218
+ #+BEGIN_SRC sh
219
+ function generateCrossword() {
220
+ find samples/ -type f -print | while read PATHNAME ; do
221
+ INSTANCE="instances/"`basename -s .txt $PATHNAME`-$1.dat
222
+ echo -grid $1 $PATHNAME > $INSTANCE
223
+ done
224
+ }
225
+
226
+ generateCrossword 3
227
+ #+END_SRC
151
228
152
229
** Configure the algorithm
153
230
@@ -158,30 +235,60 @@ The word lists ~colors.txt~ and ~monsters.txt~ takes the most time by far (aroun
158
235
159
236
* Solve
160
237
** Create the solver
238
+ *** Compare Models
239
+
161
240
162
241
#+BEGIN_SRC sh :tangle solver.sh
163
242
JAR=cryptator-0.6.0-SNAPSHOT-with-dependencies.jar
164
243
MAINCLASS=cryptator.Cryptator
165
-
166
244
MAINARGS=`cat $1 | xargs`
167
-
168
245
## exclude cryptarithm with long words
169
- ## grep -wvE '\w{10,}' $2 | sed 's/[[:space:]]//g' | xargs java -server -Xms512m -Xmx8192m -cp $JAR $MAINCLASS $MAINARGS
246
+ grep -wvE '\w{9,}' $2 | sed 's/[[:space:]]//g' | xargs java -server -Xms512m -Xmx8192m -cp $JAR $MAINCLASS $MAINARGS
247
+ exit $?
248
+ #+END_SRC
170
249
171
- ## exclude cryptarithm without long words
172
- ## grep -wE '\w{10,}' $2 | sed 's/[[:space:]]//g' | xargs java -server -Xms512m -Xmx8192m -cp $JAR $MAINCLASS $MAINARGS
250
+ *** Compare Solvers
173
251
252
+ #+BEGIN_SRC sh :tangle solver.sh
253
+ JAR=cryptator-0.6.0-SNAPSHOT-with-dependencies.jar
254
+ MAINCLASS=cryptator.Cryptator
255
+ MAINARGS=`cat $1 | xargs`
174
256
## Do not exclude anything
175
257
sed 's/[[:space:]]//g' $2 | xargs java -server -Xms512m -Xmx8192m -cp $JAR $MAINCLASS $MAINARGS
258
+
176
259
exit $?
177
260
#+END_SRC
178
261
179
- ** Configure instances
262
+ **** Crypt
180
263
181
- Select instance files in the directory ~instances-solve~ .
264
+ The very fast [[https://tamura70.gitlab.io/web-puzzle/cryptarithm/][crypt solver]] in C of Naoyuki Tamura .
182
265
183
- - Filename prefixed by ~long~ requires the bignum model.
184
- - Filename prefixed by ~short~ are accepted by the scalar model.
266
+ Postprocess the output of the crypt solver.
267
+ #+BEGIN_SRC awk :tangle crypt.awk
268
+ #!/usr/bin/awk
269
+ {
270
+ if(NR % 3 == 2 && $_ !~ /Total time =/) {
271
+ printf "i%s\n",$_
272
+ } else if(NR % 3 == 1 && NR > 1) {
273
+ print "d NBSOLS",$1
274
+ print "d TIME",$3/1000
275
+ } else if(NR % 3 == 0) {
276
+ printf "s%s\n",$_
277
+ } else print $_
278
+ }
279
+ #+END_SRC
280
+
281
+ Encapsulate the solver for benchmarking
282
+ #+BEGIN_SRC sh :tangle solver.sh
283
+ #!/bin/sh
284
+ ./crypt/crypt < $2 | awk -f crypt.awk
285
+ #+END_SRC
286
+
287
+ ** Configure instances
288
+
289
+ #+BEGIN_SRC sh
290
+ ln -s ../cryptarithms instances
291
+ #+END_SRC
185
292
186
293
** Configure the algorithm
187
294
@@ -194,10 +301,10 @@ Select instance files in the directory ~instances-solve~.
194
301
echo $ARGS -l TRUE -search 1 > algorithms/BIGNUM-1.dat
195
302
echo $ARGS -l FALSE -h FALSE -search 1 > algorithms/SCALAR-1.dat
196
303
echo $ARGS -l FALSE -h TRUE -search 1 > algorithms/HORNER-1.dat
197
-
198
304
#+END_SRC
199
305
200
306
307
+
201
308
* Testing
202
309
203
310
Change the file extension accordingly.
@@ -259,14 +366,12 @@ Change the file extension accordingly.
259
366
260
367
#+BEGIN_SRC sh
261
368
INDIR=results-cryptarithms
262
- OUTDIR=results-classified
263
- rm -fr $OUTDIR
264
- mkdir $OUTDIR
369
+ OUTDIR=../cryptarithms/samples
370
+
265
371
## Classify
266
372
(
267
373
cd $OUTDIR
268
374
find ../$INDIR -name "*.db.txt" -exec awk -f ../classify-additions.awk {} \;
269
-
270
375
)
271
376
## Sort cryptarithms
272
377
find $OUTDIR -name '*.db.txt' -exec sort -u -o {} {} \;
@@ -275,4 +380,45 @@ Change the file extension accordingly.
275
380
276
381
277
382
383
+ #+BEGIN_SRC sh
384
+ OUTDIR=../cryptarithms/samples
385
+ TEMPFILE=`mktemp`
386
+ wc -l $OUTDIR/* | head -n -1 | while read LINECOUNT DBFILE ; do
387
+ if [ $LINECOUNT -gt 1000 ] ; then
388
+ head -n 1000 $DBFILE > $TEMPFILE
389
+ mv $TEMPFILE $DBFILE
390
+ fi
391
+ done
392
+
393
+ #+END_SRC
394
+
395
+
396
+
278
397
* Analysis
398
+
399
+ #+BEGIN_SRC R
400
+ dbfiles <- list.files(c("../cryptarithms"), pattern = "*.db.txt", recursive = TRUE, full.names=TRUE)
401
+ dbfiles <- subset(dbfiles, !grepl("external-contributions", dbfiles))
402
+ cryptarithms <- unlist(sapply(dbfiles, readLines))
403
+ cryptarithms <- subset(cryptarithms, nchar(cryptarithms) > 0)
404
+ operands <- strsplit(cryptarithms, "[ +=]+")
405
+
406
+ ExtractFeatures <- function(x) {
407
+ y <- nchar(x)
408
+ r <- range(y)
409
+ z <- length(unique(unlist(strsplit(x, ""))))
410
+ c( words = length(x), letters = sum(y), symbols = z, meanLen = mean(y), medianLen = median(y), minLen = r[1], maxLen = r[2], diffLen = r[2] - r[1])
411
+ }
412
+
413
+ features <- as.data.frame(t(sapply(operands, ExtractFeatures)))
414
+ cat(nrow(features), "cryptarithms in database\n")
415
+ summary(features)
416
+
417
+ library(dplyr)
418
+ library(ggplot2)
419
+ x <- features %>% count(words, maxLen)
420
+ x$n <- log10(x$n)
421
+ ggplot(x, aes(words, maxLen, fill= n)) +
422
+ geom_tile()
423
+
424
+ #+END_SRC
0 commit comments