3
3
c_src_dir = src_c
4
4
java_src_main_dir = java/org/tartarus/snowball
5
5
java_src_dir = $(java_src_main_dir ) /ext
6
+ python ?= python3
7
+ python_output_dir = python_out
8
+ python_runtime_dir = snowballstemmer
9
+ python_sample_dir = sample
6
10
7
11
libstemmer_algorithms = danish dutch english finnish french german hungarian \
8
12
italian \
@@ -23,13 +27,16 @@ COMPILER_SOURCES = compiler/space.c \
23
27
compiler/analyser.c \
24
28
compiler/generator.c \
25
29
compiler/driver.c \
26
- compiler/generator_java.c
30
+ compiler/generator_java.c \
31
+ compiler/generator_python.c
32
+
27
33
COMPILER_HEADERS = compiler/header.h \
28
34
compiler/syswords.h \
29
35
compiler/syswords2.h
30
36
31
37
RUNTIME_SOURCES = runtime/api.c \
32
38
runtime/utilities.c
39
+
33
40
RUNTIME_HEADERS = runtime/api.h \
34
41
runtime/header.h
35
42
@@ -38,13 +45,24 @@ JAVARUNTIME_SOURCES = java/org/tartarus/snowball/Among.java \
38
45
java/org/tartarus/snowball/SnowballStemmer.java \
39
46
java/org/tartarus/snowball/TestApp.java
40
47
48
+ PYTHON_RUNTIME_SOURCES = python/snowballstemmer/basestemmer.py \
49
+ python/snowballstemmer/among.py
50
+
51
+ PYTHON_SAMPLE_SOURCES = python/testapp.py \
52
+ python/stemwords.py
53
+
54
+ PYTHON_PACKAGE_FILES = python/MANIFEST.in \
55
+ python/setup.py
56
+
41
57
LIBSTEMMER_SOURCES = libstemmer/libstemmer.c
42
58
LIBSTEMMER_UTF8_SOURCES = libstemmer/libstemmer_utf8.c
43
59
LIBSTEMMER_HEADERS = include/libstemmer.h libstemmer/modules.h libstemmer/modules_utf8.h
44
60
LIBSTEMMER_EXTRA = libstemmer/modules.txt libstemmer/modules_utf8.txt libstemmer/libstemmer_c.in
45
61
46
62
STEMWORDS_SOURCES = examples/stemwords.c
47
63
64
+ PYTHON_STEMWORDS_SOURCE = python/stemwords.py
65
+
48
66
ALL_ALGORITHM_FILES = $(all_algorithms:%=algorithms/%/stem*.sbl )
49
67
C_LIB_SOURCES = $(libstemmer_algorithms:%=$(c_src_dir ) /stem_UTF_8_%.c ) \
50
68
$(KOI8_R_algorithms:%=$(c_src_dir ) /stem_KOI8_R_%.c ) \
@@ -57,6 +75,8 @@ C_LIB_HEADERS = $(libstemmer_algorithms:%=$(c_src_dir)/stem_UTF_8_%.h) \
57
75
C_OTHER_SOURCES = $(other_algorithms:%=$(c_src_dir ) /stem_UTF_8_%.c )
58
76
C_OTHER_HEADERS = $(other_algorithms:%=$(c_src_dir ) /stem_UTF_8_%.h )
59
77
JAVA_SOURCES = $(libstemmer_algorithms:%=$(java_src_dir ) /%Stemmer.java )
78
+ PYTHON_SOURCES = $(libstemmer_algorithms:%=$(python_output_dir ) /%_stemmer.py ) \
79
+ $(python_output_dir ) /__init__.py
60
80
61
81
COMPILER_OBJECTS=$(COMPILER_SOURCES:.c =.o)
62
82
RUNTIME_OBJECTS=$(RUNTIME_SOURCES:.c =.o)
@@ -83,10 +103,12 @@ clean:
83
103
$(C_LIB_SOURCES) $(C_LIB_HEADERS) $(C_LIB_OBJECTS) \
84
104
$(C_OTHER_SOURCES) $(C_OTHER_HEADERS) $(C_OTHER_OBJECTS) \
85
105
$(JAVA_SOURCES) $(JAVA_CLASSES) $(JAVA_RUNTIME_CLASSES) \
106
+ $(PYTHON_SOURCES) \
86
107
libstemmer/mkinc.mak libstemmer/mkinc_utf8.mak \
87
108
libstemmer/libstemmer.c libstemmer/libstemmer_utf8.c
88
109
rm -rf dist
89
110
rmdir $(c_src_dir) || true
111
+ rmdir $(python_output_dir) || true
90
112
91
113
snowball : $(COMPILER_OBJECTS )
92
114
$(CC ) -o $@ $^
@@ -154,12 +176,23 @@ $(java_src_dir)/%Stemmer.java: algorithms/%/stem_Unicode.sbl snowball
154
176
echo " ./snowball $< -j -o $$ {o} -p \" org.tartarus.snowball.SnowballStemmer\" -eprefix $$ {l}_ -r ../runtime -n $$ {l}Stemmer" ; \
155
177
./snowball $< -j -o $$ {o} -p " org.tartarus.snowball.SnowballStemmer" -eprefix $$ {l}_ -r ../runtime -n $$ {l}Stemmer
156
178
179
+ $(python_output_dir ) /% _stemmer.py : algorithms/% /stem_Unicode.sbl snowball
180
+ @mkdir -p $(python_output_dir )
181
+ @l=` echo " $<" | sed ' s!\(.*\)/stem_Unicode.sbl$$!\1!;s!^.*/!!' ` ; \
182
+ o=" $( python_output_dir) /$$ {l}_stemmer" ; \
183
+ echo " ./snowball $< -py -o $$ {o} -p \" SnowballStemmer\" -eprefix $$ {l}_ -r ../runtime -n ` $( python) -c " print('$$ {l}'.title())" ` Stemmer" ; \
184
+ ./snowball $< -py -o $$ {o} -p " BaseStemmer" -eprefix $$ {l}_ -r ../runtime -n ` $( python) -c " print('$$ {l}'.title())" ` Stemmer
185
+
186
+ $(python_output_dir ) /__init__.py :
187
+ @mkdir -p $(python_output_dir )
188
+ $(python ) python/create_init.py $(python_output_dir )
189
+
157
190
splint : snowball.splint
158
191
snowball.splint : $(COMPILER_SOURCES )
159
192
splint $^ > $@ -weak
160
193
161
194
# Make a full source distribution
162
- dist : dist_snowball dist_libstemmer_c dist_libstemmer_java
195
+ dist : dist_snowball dist_libstemmer_c dist_libstemmer_java dist_libstemmer_python
163
196
164
197
# Make a distribution of all the sources involved in snowball
165
198
dist_snowball : $(COMPILER_SOURCES ) $(COMPILER_HEADERS ) \
@@ -250,6 +283,23 @@ dist_libstemmer_java: $(RUNTIME_SOURCES) $(RUNTIME_HEADERS) \
250
283
(cd dist && tar zcf $${destname}.tgz $${destname}) && \
251
284
rm -rf $${dest}
252
285
286
+ dist_libstemmer_python : $(PYTHON_SOURCES )
287
+ destname=snowballstemmer; \
288
+ dest=dist/$$ {destname}; \
289
+ rm -rf $$ {dest} && \
290
+ rm -f $$ {dest}.tgz && \
291
+ echo " a1" && \
292
+ mkdir -p $$ {dest} && \
293
+ mkdir -p $$ {dest}/src/$(python_runtime_dir ) && \
294
+ mkdir -p $$ {dest}/src/$(python_sample_dir ) && \
295
+ cp doc/libstemmer_python_README $$ {dest}/README.rst && \
296
+ cp -a $(PYTHON_SOURCES ) $$ {dest}/src/$(python_runtime_dir ) && \
297
+ cp -a $(PYTHON_SAMPLE_SOURCES ) $$ {dest}/src/$(python_sample_dir ) && \
298
+ cp -a $(PYTHON_RUNTIME_SOURCES ) $$ {dest}/src/$(python_runtime_dir ) && \
299
+ cp -a $(PYTHON_PACKAGE_FILES ) $$ {dest} && \
300
+ (cd $$ {dest} && $( python) setup.py sdist && cp dist/* .tar.gz ..) && \
301
+ rm -rf $$ {dest}
302
+
253
303
check : check_utf8 check_iso_8859_1 check_iso_8859_2 check_koi8r
254
304
255
305
check_utf8 : $(libstemmer_algorithms:%=check_utf8_% )
@@ -293,3 +343,19 @@ check_koi8r_%: $(STEMMING_DATA)/% stemwords
293
343
@python -c ' print(open("$</output.txt").read().decode("utf8").encode("koi8_r"))' | \
294
344
diff -u - tmp.txt
295
345
@rm tmp.txt
346
+
347
+ check_python : check_python_stemwords $(libstemmer_algorithms:%=check_python_% )
348
+
349
+ check_python_% : $(STEMMING_DATA ) /%
350
+ @echo " Checking output of ` echo $< | sed ' s!.*/!!' ` stemmer with UTF-8"
351
+ (cd python_check && \
352
+ $(python ) stemwords.py -c utf8 -l ` echo $< | sed ' s!.*/!!' ` -i ../$< /voc.txt -o tmp.txt && \
353
+ diff -u ../$< /output.txt tmp.txt && \
354
+ rm tmp.txt)
355
+
356
+ check_python_stemwords : $(PYTHON_STEMWORDS_SOURCE ) $(PYTHON_SOURCES )
357
+ mkdir -p python_check && \
358
+ mkdir -p python_check/snowballstemmer && \
359
+ cp -a $(PYTHON_RUNTIME_SOURCES ) python_check/snowballstemmer && \
360
+ cp -a $(PYTHON_SOURCES ) python_check/snowballstemmer && \
361
+ cp -a $(PYTHON_STEMWORDS_SOURCE ) python_check/
0 commit comments