Skip to content

Commit 3d7fd5c

Browse files
committed
Added support for the 'StringLengthTransformer' transformation type
1 parent 38f5c45 commit 3d7fd5c

File tree

7 files changed

+797
-742
lines changed

7 files changed

+797
-742
lines changed

README.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -444,6 +444,7 @@ Java library and command-line application for converting [Scikit-Learn](https://
444444
* `sklearn2pmml.preprocessing.SecondsSinceYearTransformer`
445445
* `sklearn2pmml.preprocessing.SelectFirstTransformer`
446446
* `sklearn2pmml.preprocessing.SeriesConstructor`
447+
* `sklearn2pmml.preprocessing.StringLengthTransformer`
447448
* `sklearn2pmml.preprocessing.StringNormalizer`
448449
* `sklearn2pmml.preprocessing.SubstringTransformer`
449450
* `sklearn2pmml.preprocessing.WordCountTransformer`
Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,53 @@
1+
/*
2+
* Copyright (c) 2025 Villu Ruusmann
3+
*
4+
* This file is part of JPMML-SkLearn
5+
*
6+
* JPMML-SkLearn is free software: you can redistribute it and/or modify
7+
* it under the terms of the GNU Affero General Public License as published by
8+
* the Free Software Foundation, either version 3 of the License, or
9+
* (at your option) any later version.
10+
*
11+
* JPMML-SkLearn is distributed in the hope that it will be useful,
12+
* but WITHOUT ANY WARRANTY; without even the implied warranty of
13+
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14+
* GNU Affero General Public License for more details.
15+
*
16+
* You should have received a copy of the GNU Affero General Public License
17+
* along with JPMML-SkLearn. If not, see <http://www.gnu.org/licenses/>.
18+
*/
19+
package sklearn2pmml.preprocessing;
20+
21+
import java.util.Collections;
22+
import java.util.List;
23+
24+
import org.dmg.pmml.Apply;
25+
import org.dmg.pmml.DataType;
26+
import org.dmg.pmml.DerivedField;
27+
import org.dmg.pmml.OpType;
28+
import org.dmg.pmml.PMMLFunctions;
29+
import org.jpmml.converter.ContinuousFeature;
30+
import org.jpmml.converter.ExpressionUtil;
31+
import org.jpmml.converter.Feature;
32+
import org.jpmml.python.ClassDictUtil;
33+
import org.jpmml.sklearn.SkLearnEncoder;
34+
35+
public class StringLengthTransformer extends StringTransformer {
36+
37+
public StringLengthTransformer(String module, String name){
38+
super(module, name);
39+
}
40+
41+
@Override
42+
public List<Feature> encodeFeatures(List<Feature> features, SkLearnEncoder encoder){
43+
ClassDictUtil.checkSize(1, features);
44+
45+
Feature feature = features.get(0);
46+
47+
Apply apply = ExpressionUtil.createApply(PMMLFunctions.STRINGLENGTH, feature.ref());
48+
49+
DerivedField derivedField = encoder.createDerivedField(createFieldName("length", feature), OpType.CONTINUOUS, DataType.INTEGER, apply);
50+
51+
return Collections.singletonList(new ContinuousFeature(encoder, derivedField));
52+
}
53+
}

pmml-sklearn/src/main/resources/META-INF/sklearn2pmml.properties

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -219,6 +219,7 @@ sklearn2pmml.preprocessing.SecondsSinceMidnightTransformer =
219219
sklearn2pmml.preprocessing.SecondsSinceYearTransformer =
220220
sklearn2pmml.preprocessing.SelectFirstTransformer =
221221
sklearn2pmml.preprocessing.SeriesConstructor =
222+
sklearn2pmml.preprocessing.StringLengthTransformer =
222223
sklearn2pmml.preprocessing.StringNormalizer =
223224
sklearn2pmml.preprocessing.SubstringTransformer =
224225
sklearn2pmml.preprocessing.WordCountTransformer =

0 commit comments

Comments
 (0)