3
3
c_src_dir = src_c
4
4
java_src_main_dir = java/org/tartarus/snowball
5
5
java_src_dir = $(java_src_main_dir ) /ext
6
+ python_output_dir = python_out
7
+ python_runtime_dir = snowballstemmer
8
+ python_sample_dir = sample
9
+ jsx_output_dir = jsx_out
10
+ jsx_runtime_src_dir = jsx
11
+ jsx_runtime_dir = lib
12
+ jsx_sample_dir = sample
6
13
7
14
libstemmer_algorithms = danish dutch english finnish french german hungarian \
8
15
italian \
@@ -23,13 +30,17 @@ COMPILER_SOURCES = compiler/space.c \
23
30
compiler/analyser.c \
24
31
compiler/generator.c \
25
32
compiler/driver.c \
26
- compiler/generator_java.c
33
+ compiler/generator_java.c \
34
+ compiler/generator_python.c \
35
+ compiler/generator_jsx.c
36
+
27
37
COMPILER_HEADERS = compiler/header.h \
28
38
compiler/syswords.h \
29
39
compiler/syswords2.h
30
40
31
41
RUNTIME_SOURCES = runtime/api.c \
32
42
runtime/utilities.c
43
+
33
44
RUNTIME_HEADERS = runtime/api.h \
34
45
runtime/header.h
35
46
@@ -38,13 +49,33 @@ JAVARUNTIME_SOURCES = java/org/tartarus/snowball/Among.java \
38
49
java/org/tartarus/snowball/SnowballStemmer.java \
39
50
java/org/tartarus/snowball/TestApp.java
40
51
52
+ JSX_RUNTIME_SOURCES = jsx/among.jsx \
53
+ jsx/base-stemmer.jsx \
54
+ jsx/stemmer.jsx
55
+
56
+ JSX_SAMPLE_SOURCES = jsx/testapp.jsx \
57
+ jsx/stemwords.jsx
58
+
59
+ PYTHON_RUNTIME_SOURCES = python/snowballstemmer/basestemmer.py \
60
+ python/snowballstemmer/among.py
61
+
62
+ PYTHON_SAMPLE_SOURCES = python/testapp.py \
63
+ python/stemwords.py
64
+
65
+ PYTHON_PACKAGE_FILES = python/MANIFEST.in \
66
+ python/setup.py
67
+
41
68
LIBSTEMMER_SOURCES = libstemmer/libstemmer.c
42
69
LIBSTEMMER_UTF8_SOURCES = libstemmer/libstemmer_utf8.c
43
70
LIBSTEMMER_HEADERS = include/libstemmer.h libstemmer/modules.h libstemmer/modules_utf8.h
44
71
LIBSTEMMER_EXTRA = libstemmer/modules.txt libstemmer/modules_utf8.txt libstemmer/libstemmer_c.in
45
72
46
73
STEMWORDS_SOURCES = examples/stemwords.c
47
74
75
+ JSX_STEMWORDS_SOURCE = jsx/stemwords.jsx
76
+
77
+ PYTHON_STEMWORDS_SOURCE = python/stemwords.py
78
+
48
79
ALL_ALGORITHM_FILES = $(all_algorithms:%=algorithms/%/stem*.sbl )
49
80
C_LIB_SOURCES = $(libstemmer_algorithms:%=$(c_src_dir ) /stem_UTF_8_%.c ) \
50
81
$(KOI8_R_algorithms:%=$(c_src_dir ) /stem_KOI8_R_%.c ) \
@@ -57,6 +88,9 @@ C_LIB_HEADERS = $(libstemmer_algorithms:%=$(c_src_dir)/stem_UTF_8_%.h) \
57
88
C_OTHER_SOURCES = $(other_algorithms:%=$(c_src_dir ) /stem_UTF_8_%.c )
58
89
C_OTHER_HEADERS = $(other_algorithms:%=$(c_src_dir ) /stem_UTF_8_%.h )
59
90
JAVA_SOURCES = $(libstemmer_algorithms:%=$(java_src_dir ) /%Stemmer.java )
91
+ PYTHON_SOURCES = $(libstemmer_algorithms:%=$(python_output_dir ) /%_stemmer.py ) \
92
+ $(python_output_dir ) /__init__.py
93
+ JSX_SOURCES = $(libstemmer_algorithms:%=$(jsx_output_dir ) /%-stemmer.jsx )
60
94
61
95
COMPILER_OBJECTS=$(COMPILER_SOURCES:.c =.o)
62
96
RUNTIME_OBJECTS=$(RUNTIME_SOURCES:.c =.o)
@@ -83,10 +117,14 @@ clean:
83
117
$(C_LIB_SOURCES) $(C_LIB_HEADERS) $(C_LIB_OBJECTS) \
84
118
$(C_OTHER_SOURCES) $(C_OTHER_HEADERS) $(C_OTHER_OBJECTS) \
85
119
$(JAVA_SOURCES) $(JAVA_CLASSES) $(JAVA_RUNTIME_CLASSES) \
120
+ $(PYTHON_SOURCES) \
121
+ $(JSX_SOURCES) jsx_stemwords \
86
122
libstemmer/mkinc.mak libstemmer/mkinc_utf8.mak \
87
123
libstemmer/libstemmer.c libstemmer/libstemmer_utf8.c
88
124
rm -rf dist
89
125
rmdir $(c_src_dir) || true
126
+ rmdir $(python_output_dir) || true
127
+ rmdir $(jsx_output_dir) || true
90
128
91
129
snowball : $(COMPILER_OBJECTS )
92
130
$(CC ) -o $@ $^
@@ -113,6 +151,9 @@ libstemmer.o: libstemmer/libstemmer.o $(RUNTIME_OBJECTS) $(C_LIB_OBJECTS)
113
151
stemwords : $(STEMWORDS_OBJECTS ) libstemmer.o
114
152
$(CC ) -o $@ $^
115
153
154
+ jsx_stemwords : $(JSX_STEMWORDS_SOURCE ) $(JSX_SOURCES )
155
+ jsx --executable node --output $@ --add-search-path $(jsx_output_dir ) --add-search-path $(jsx_runtime_src_dir ) $(JSX_STEMWORDS_SOURCE )
156
+
116
157
algorithms/% /stem_Unicode.sbl : algorithms/% /stem_ISO_8859_1.sbl
117
158
cp $^ $@
118
159
@@ -154,12 +195,30 @@ $(java_src_dir)/%Stemmer.java: algorithms/%/stem_Unicode.sbl snowball
154
195
echo " ./snowball $< -j -o $$ {o} -p \" org.tartarus.snowball.SnowballStemmer\" -eprefix $$ {l}_ -r ../runtime -n $$ {l}Stemmer" ; \
155
196
./snowball $< -j -o $$ {o} -p " org.tartarus.snowball.SnowballStemmer" -eprefix $$ {l}_ -r ../runtime -n $$ {l}Stemmer
156
197
198
+ $(python_output_dir ) /% _stemmer.py : algorithms/% /stem_Unicode.sbl snowball
199
+ @mkdir -p $(python_output_dir )
200
+ @l=` echo " $<" | sed ' s!\(.*\)/stem_Unicode.sbl$$!\1!;s!^.*/!!' ` ; \
201
+ o=" $( python_output_dir) /$$ {l}_stemmer" ; \
202
+ echo " ./snowball $< -py -o $$ {o} -p \" SnowballStemmer\" -eprefix $$ {l}_ -r ../runtime -n ` python -c " print('$$ {l}'.title())" ` Stemmer" ; \
203
+ ./snowball $< -py -o $$ {o} -p " BaseStemmer" -eprefix $$ {l}_ -r ../runtime -n ` python -c " print('$$ {l}'.title())" ` Stemmer
204
+
205
+ $(python_output_dir ) /__init__.py :
206
+ @mkdir -p $(python_output_dir )
207
+ python python/create_init.py $(python_output_dir )
208
+
209
+ $(jsx_output_dir ) /% -stemmer.jsx : algorithms/% /stem_Unicode.sbl snowball
210
+ @mkdir -p $(jsx_output_dir )
211
+ @l=` echo " $<" | sed ' s!\(.*\)/stem_Unicode.sbl$$!\1!;s!^.*/!!' ` ; \
212
+ o=" $( jsx_output_dir) /$$ {l}-stemmer" ; \
213
+ echo " ./snowball $< -jsx -o $$ {o} -p \" SnowballStemmer\" -eprefix $$ {l}_ -r ../runtime -n ` python -c " print('$$ {l}'.title())" ` Stemmer" ; \
214
+ ./snowball $< -jsx -o $$ {o} -p " BaseStemmer" -eprefix $$ {l}_ -r ../runtime -n ` python -c " print('$$ {l}'.title())" ` Stemmer
215
+
157
216
splint : snowball.splint
158
217
snowball.splint : $(COMPILER_SOURCES )
159
218
splint $^ > $@ -weak
160
219
161
220
# Make a full source distribution
162
- dist : dist_snowball dist_libstemmer_c dist_libstemmer_java
221
+ dist : dist_snowball dist_libstemmer_c dist_libstemmer_java dist_libstemmer_jsx dist_libstemmer_python
163
222
164
223
# Make a distribution of all the sources involved in snowball
165
224
dist_snowball : $(COMPILER_SOURCES ) $(COMPILER_HEADERS ) \
@@ -250,6 +309,42 @@ dist_libstemmer_java: $(RUNTIME_SOURCES) $(RUNTIME_HEADERS) \
250
309
(cd dist && tar zcf $${destname}.tgz $${destname}) && \
251
310
rm -rf $${dest}
252
311
312
+ dist_libstemmer_python : $(PYTHON_SOURCES )
313
+ destname=snowballstemmer; \
314
+ dest=dist/$$ {destname}; \
315
+ rm -rf $$ {dest} && \
316
+ rm -f $$ {dest}.tgz && \
317
+ echo " a1" && \
318
+ mkdir -p $$ {dest} && \
319
+ mkdir -p $$ {dest}/src/$(python_runtime_dir ) && \
320
+ mkdir -p $$ {dest}/src/$(python_sample_dir ) && \
321
+ cp doc/libstemmer_python_README $$ {dest}/README.rst && \
322
+ cp -a $(PYTHON_SOURCES ) $$ {dest}/src/$(python_runtime_dir ) && \
323
+ cp -a $(PYTHON_SAMPLE_SOURCES ) $$ {dest}/src/$(python_sample_dir ) && \
324
+ cp -a $(PYTHON_RUNTIME_SOURCES ) $$ {dest}/src/$(python_runtime_dir ) && \
325
+ cp -a $(PYTHON_PACKAGE_FILES ) $$ {dest} && \
326
+ (cd $$ {dest} && python setup.py sdist && cp dist/* .tar.gz ..) && \
327
+ rm -rf $$ {dest}
328
+
329
+ dist_libstemmer_jsx : $(JSX_SOURCES )
330
+ destname=jsxstemmer; \
331
+ dest=dist/$$ {destname}; \
332
+ rm -rf $$ {dest} && \
333
+ rm -f $$ {dest}.tgz && \
334
+ mkdir -p $$ {dest} && \
335
+ mkdir -p $$ {dest}/$(jsx_runtime_dir ) && \
336
+ mkdir -p $$ {dest}/$(jsx_sample_dir ) && \
337
+ cp -a doc/libstemmer_jsx_README $$ {dest}/README && \
338
+ cp -a $(JSX_RUNTIME_SOURCES ) $$ {dest}/$(jsx_runtime_dir ) && \
339
+ cp -a $(JSX_SAMPLE_SOURCES ) $$ {dest}/$(jsx_sample_dir ) && \
340
+ cp -a $(JSX_SOURCES ) $$ {dest}/$(jsx_runtime_dir ) && \
341
+ (cd $$ {dest} && \
342
+ echo " README" >> MANIFEST && \
343
+ ls $(jsx_runtime_dir ) /* .jsx >> MANIFEST && \
344
+ ls $(jsx_sample_dir ) /* .jsx >> MANIFEST) && \
345
+ (cd dist && tar zcf $$ {destname}.tgz $$ {destname}) && \
346
+ rm -rf $$ {dest}
347
+
253
348
check : check_utf8 check_iso_8859_1 check_iso_8859_2 check_koi8r
254
349
255
350
check_utf8 : $(libstemmer_algorithms:%=check_utf8_% )
@@ -277,24 +372,48 @@ check_utf8_%: $(STEMMING_DATA)/% stemwords
277
372
278
373
check_iso_8859_1_% : $(STEMMING_DATA ) /% stemwords
279
374
@echo " Checking output of ` echo $< | sed ' s!.*/!!' ` stemmer with ISO_8859_1"
280
- @python -c ' print(open(" $</voc.txt").read().decode("utf8").encode("iso8859-1")) ' | \
375
+ @iconv -fUTF8 -tISO8859-1 $< /voc.txt | \
281
376
./stemwords -c ISO_8859_1 -l ` echo $< | sed ' s!.*/!!' ` -o tmp.txt
282
- @python -c ' print(open(" $</output.txt").read().decode("utf8").encode("iso8859-1")) ' | \
377
+ @iconv -fUTF8 -tISO8859-1 $< /output.txt | \
283
378
diff -u - tmp.txt
284
379
@rm tmp.txt
285
380
286
381
check_iso_8859_2_% : $(STEMMING_DATA ) /% stemwords
287
382
@echo " Checking output of ` echo $< | sed ' s!.*/!!' ` stemmer with ISO_8859_2"
288
- @python -c ' print(open(" $</voc.txt").read().decode("utf8").encode("iso8859-2")) ' | \
383
+ @iconv -fUTF8 -tISO8859-2 $< /voc.txt | \
289
384
./stemwords -c ISO_8859_2 -l ` echo $< | sed ' s!.*/!!' ` -o tmp.txt
290
- @python -c ' print(open(" $</output.txt").read().decode("utf8").encode("iso8859-2")) ' | \
385
+ @iconv -fUTF8 -tISO8859-2 $< /output.txt | \
291
386
diff -u - tmp.txt
292
387
@rm tmp.txt
293
388
294
389
check_koi8r_% : $(STEMMING_DATA ) /% stemwords
295
390
@echo " Checking output of ` echo $< | sed ' s!.*/!!' ` stemmer with KOI8R"
296
- @python -c ' print(open(" $</voc.txt").read().decode("utf8").encode("koi8_r")) ' | \
391
+ @iconv -fUTF8 -tKOI8R $< /voc.txt | \
297
392
./stemwords -c KOI8_R -l ` echo $< | sed ' s!.*/!!' ` -o tmp.txt
298
- @python -c ' print(open(" $</output.txt").read().decode("utf8").encode("koi8_r")) ' | \
393
+ @iconv -fUTF8 -tKOI8R $< /output.txt | \
299
394
diff -u - tmp.txt
300
395
@rm tmp.txt
396
+
397
+ check_jsx : $(libstemmer_algorithms:%=check_jsx_% )
398
+
399
+ check_jsx_% : $(STEMMING_DATA ) /% jsx_stemwords
400
+ @echo " Checking output of ` echo $< | sed ' s!.*/!!' ` stemmer with UTF-8"
401
+ @./jsx_stemwords -c utf8 -l ` echo $< | sed ' s!.*/!!' ` -i $< /voc.txt -o tmp.txt
402
+ @diff -u $< /output.txt tmp.txt
403
+ @rm tmp.txt
404
+
405
+ check_python : check_python_stemwords $(libstemmer_algorithms:%=check_python_% )
406
+
407
+ check_python_% : $(STEMMING_DATA ) /%
408
+ @echo " Checking output of ` echo $< | sed ' s!.*/!!' ` stemmer with UTF-8"
409
+ (cd python_check && \
410
+ python stemwords.py -c utf8 -l ` echo $< | sed ' s!.*/!!' ` -i ../$< /voc.txt -o tmp.txt && \
411
+ diff -u ../$< /output.txt tmp.txt && \
412
+ rm tmp.txt)
413
+
414
+ check_python_stemwords : $(PYTHON_STEMWORDS_SOURCE ) $(PYTHON_SOURCES )
415
+ mkdir -p python_check && \
416
+ mkdir -p python_check/snowballstemmer && \
417
+ cp -a $(PYTHON_RUNTIME_SOURCES ) python_check/snowballstemmer && \
418
+ cp -a $(PYTHON_SOURCES ) python_check/snowballstemmer && \
419
+ cp -a $(PYTHON_STEMWORDS_SOURCE ) python_check/
0 commit comments