Skip to content

Add Python generator #24

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Sep 7, 2015
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -3,3 +3,5 @@
/snowball
/src_c
/stemwords
/dist
/python_out
1 change: 1 addition & 0 deletions .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -13,3 +13,4 @@ before_install:
- git clone --depth=1 -b "$TRAVIS_BRANCH" https://github.com:"${TRAVIS_REPO_SLUG%%/*}"/snowball-data.git || git clone --depth=1 -b "$TRAVIS_BRANCH" https://github.com/snowballstem/snowball-data.git || git clone --depth=1 https://github.com/snowballstem/snowball-data.git
script:
- make && make check STEMMING_DATA=snowball-data
- make check_python python=python2.7 STEMMING_DATA=snowball-data
69 changes: 67 additions & 2 deletions GNUmakefile
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,10 @@
c_src_dir = src_c
java_src_main_dir = java/org/tartarus/snowball
java_src_dir = $(java_src_main_dir)/ext
python ?= python3
python_output_dir = python_out
python_runtime_dir = snowballstemmer
python_sample_dir = sample

libstemmer_algorithms = danish dutch english finnish french german hungarian \
italian \
Expand All @@ -23,7 +27,9 @@ COMPILER_SOURCES = compiler/space.c \
compiler/analyser.c \
compiler/generator.c \
compiler/driver.c \
compiler/generator_java.c
compiler/generator_java.c \
compiler/generator_python.c

COMPILER_HEADERS = compiler/header.h \
compiler/syswords.h \
compiler/syswords2.h
Expand All @@ -38,13 +44,24 @@ JAVARUNTIME_SOURCES = java/org/tartarus/snowball/Among.java \
java/org/tartarus/snowball/SnowballStemmer.java \
java/org/tartarus/snowball/TestApp.java

PYTHON_RUNTIME_SOURCES = python/snowballstemmer/basestemmer.py \
python/snowballstemmer/among.py

PYTHON_SAMPLE_SOURCES = python/testapp.py \
python/stemwords.py

PYTHON_PACKAGE_FILES = python/MANIFEST.in \
python/setup.py

LIBSTEMMER_SOURCES = libstemmer/libstemmer.c
LIBSTEMMER_UTF8_SOURCES = libstemmer/libstemmer_utf8.c
LIBSTEMMER_HEADERS = include/libstemmer.h libstemmer/modules.h libstemmer/modules_utf8.h
LIBSTEMMER_EXTRA = libstemmer/modules.txt libstemmer/modules_utf8.txt libstemmer/libstemmer_c.in

STEMWORDS_SOURCES = examples/stemwords.c

PYTHON_STEMWORDS_SOURCE = python/stemwords.py

ALL_ALGORITHM_FILES = $(all_algorithms:%=algorithms/%/stem*.sbl)
C_LIB_SOURCES = $(libstemmer_algorithms:%=$(c_src_dir)/stem_UTF_8_%.c) \
$(KOI8_R_algorithms:%=$(c_src_dir)/stem_KOI8_R_%.c) \
Expand All @@ -57,6 +74,8 @@ C_LIB_HEADERS = $(libstemmer_algorithms:%=$(c_src_dir)/stem_UTF_8_%.h) \
C_OTHER_SOURCES = $(other_algorithms:%=$(c_src_dir)/stem_UTF_8_%.c)
C_OTHER_HEADERS = $(other_algorithms:%=$(c_src_dir)/stem_UTF_8_%.h)
JAVA_SOURCES = $(libstemmer_algorithms:%=$(java_src_dir)/%Stemmer.java)
PYTHON_SOURCES = $(libstemmer_algorithms:%=$(python_output_dir)/%_stemmer.py) \
$(python_output_dir)/__init__.py

COMPILER_OBJECTS=$(COMPILER_SOURCES:.c=.o)
RUNTIME_OBJECTS=$(RUNTIME_SOURCES:.c=.o)
Expand All @@ -83,10 +102,12 @@ clean:
$(C_LIB_SOURCES) $(C_LIB_HEADERS) $(C_LIB_OBJECTS) \
$(C_OTHER_SOURCES) $(C_OTHER_HEADERS) $(C_OTHER_OBJECTS) \
$(JAVA_SOURCES) $(JAVA_CLASSES) $(JAVA_RUNTIME_CLASSES) \
$(PYTHON_SOURCES) \
libstemmer/mkinc.mak libstemmer/mkinc_utf8.mak \
libstemmer/libstemmer.c libstemmer/libstemmer_utf8.c
rm -rf dist
rmdir $(c_src_dir) || true
rmdir $(python_output_dir) || true

snowball: $(COMPILER_OBJECTS)
$(CC) -o $@ $^
Expand Down Expand Up @@ -154,12 +175,23 @@ $(java_src_dir)/%Stemmer.java: algorithms/%/stem_Unicode.sbl snowball
echo "./snowball $< -j -o $${o} -p \"org.tartarus.snowball.SnowballStemmer\" -eprefix $${l}_ -r ../runtime -n $${l}Stemmer"; \
./snowball $< -j -o $${o} -p "org.tartarus.snowball.SnowballStemmer" -eprefix $${l}_ -r ../runtime -n $${l}Stemmer

$(python_output_dir)/%_stemmer.py: algorithms/%/stem_Unicode.sbl snowball
@mkdir -p $(python_output_dir)
@l=`echo "$<" | sed 's!\(.*\)/stem_Unicode.sbl$$!\1!;s!^.*/!!'`; \
o="$(python_output_dir)/$${l}_stemmer"; \
echo "./snowball $< -py -o $${o} -p \"SnowballStemmer\" -eprefix $${l}_ -r ../runtime -n `$(python) -c "print('$${l}'.title())"`Stemmer"; \
./snowball $< -py -o $${o} -p "BaseStemmer" -eprefix $${l}_ -r ../runtime -n `$(python) -c "print('$${l}'.title())"`Stemmer

$(python_output_dir)/__init__.py:
@mkdir -p $(python_output_dir)
$(python) python/create_init.py $(python_output_dir)

splint: snowball.splint
snowball.splint: $(COMPILER_SOURCES)
splint $^ >$@ -weak

# Make a full source distribution
dist: dist_snowball dist_libstemmer_c dist_libstemmer_java
dist: dist_snowball dist_libstemmer_c dist_libstemmer_java dist_libstemmer_python

# Make a distribution of all the sources involved in snowball
dist_snowball: $(COMPILER_SOURCES) $(COMPILER_HEADERS) \
Expand Down Expand Up @@ -250,6 +282,23 @@ dist_libstemmer_java: $(RUNTIME_SOURCES) $(RUNTIME_HEADERS) \
(cd dist && tar zcf $${destname}.tgz $${destname}) && \
rm -rf $${dest}

dist_libstemmer_python: $(PYTHON_SOURCES)
destname=snowballstemmer; \
dest=dist/$${destname}; \
rm -rf $${dest} && \
rm -f $${dest}.tgz && \
echo "a1" && \
mkdir -p $${dest} && \
mkdir -p $${dest}/src/$(python_runtime_dir) && \
mkdir -p $${dest}/src/$(python_sample_dir) && \
cp doc/libstemmer_python_README $${dest}/README.rst && \
cp -a $(PYTHON_SOURCES) $${dest}/src/$(python_runtime_dir) && \
cp -a $(PYTHON_SAMPLE_SOURCES) $${dest}/src/$(python_sample_dir) && \
cp -a $(PYTHON_RUNTIME_SOURCES) $${dest}/src/$(python_runtime_dir) && \
cp -a $(PYTHON_PACKAGE_FILES) $${dest} && \
(cd $${dest} && $(python) setup.py sdist && cp dist/*.tar.gz ..) && \
rm -rf $${dest}

check: check_utf8 check_iso_8859_1 check_iso_8859_2 check_koi8r

check_utf8: $(libstemmer_algorithms:%=check_utf8_%)
Expand Down Expand Up @@ -293,3 +342,19 @@ check_koi8r_%: $(STEMMING_DATA)/% stemwords
@python -c 'print(open("$</output.txt").read().decode("utf8").encode("koi8_r"))' | \
diff -u - tmp.txt
@rm tmp.txt

check_python: check_python_stemwords $(libstemmer_algorithms:%=check_python_%)

check_python_%: $(STEMMING_DATA)/%
@echo "Checking output of `echo $<|sed 's!.*/!!'` stemmer with UTF-8"
(cd python_check && \
$(python) stemwords.py -c utf8 -l `echo $<|sed 's!.*/!!'` -i ../$</voc.txt -o tmp.txt && \
diff -u ../$</output.txt tmp.txt && \
rm tmp.txt)

check_python_stemwords: $(PYTHON_STEMWORDS_SOURCE) $(PYTHON_SOURCES)
mkdir -p python_check && \
mkdir -p python_check/snowballstemmer && \
cp -a $(PYTHON_RUNTIME_SOURCES) python_check/snowballstemmer && \
cp -a $(PYTHON_SOURCES) python_check/snowballstemmer && \
cp -a $(PYTHON_STEMWORDS_SOURCE) python_check/
22 changes: 22 additions & 0 deletions compiler/driver.c
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,9 @@ static void print_arglist(void) {
" [-j[ava]]\n"
#endif
" [-c++]\n"
#ifndef DISABLE_PYTHON
" [-py[thon]]\n"
#endif
" [-w[idechars]]\n"
" [-u[tf8]]\n"
" [-n[ame] class name]\n"
Expand Down Expand Up @@ -105,6 +108,13 @@ static void read_options(struct options * o, int argc, char * argv[]) {
o->make_lang = LANG_CPLUSPLUS;
continue;
}
#ifndef DISABLE_PYTHON
if (eq(s, "-py") || eq(s, "-python")) {
o->make_lang = LANG_PYTHON;
o->widechars = true;
continue;
}
#endif
if (eq(s, "-w") || eq(s, "-widechars")) {
o->widechars = true;
o->utf8 = false;
Expand Down Expand Up @@ -237,6 +247,18 @@ extern int main(int argc, char * argv[]) {
close_generator_java(g);
fclose(o->output_java);
}
#endif
#ifndef DISABLE_PYTHON
if (o->make_lang == LANG_PYTHON) {
symbol * b = add_s_to_b(0, s);
b = add_s_to_b(b, ".py");
o->output_python = get_output(b);
lose_b(b);
g = create_generator_python(a, o);
generate_program_python(g);
close_generator_python(g);
fclose(o->output_python);
}
#endif
}
close_analyser(a);
Expand Down
Loading