Skip to content

Commit 5d4dc4f

Browse files
committed
AugmentGrammar: Add Strong List 2 Index mode
This mode analyzes Strongs numbers that occur more than once in a verse and creates a list of source indices from it. If the Strongs number occurs the same number of times in the verse when augmenting, assign the indices in the same order to them.
1 parent 2f1feee commit 5d4dc4f

File tree

1 file changed

+77
-11
lines changed

1 file changed

+77
-11
lines changed

biblemulticonverter/src/main/java/biblemulticonverter/format/AugmentGrammar.java

+77-11
Original file line numberDiff line numberDiff line change
@@ -4,13 +4,15 @@
44
import java.io.File;
55
import java.io.FileInputStream;
66
import java.io.FileOutputStream;
7-
import java.io.FileWriter;
87
import java.io.IOException;
98
import java.io.OutputStreamWriter;
109
import java.nio.charset.StandardCharsets;
1110
import java.util.ArrayList;
1211
import java.util.Arrays;
12+
import java.util.HashMap;
1313
import java.util.List;
14+
import java.util.Map;
15+
import java.util.Map.Entry;
1416
import java.util.NoSuchElementException;
1517
import java.util.Properties;
1618

@@ -42,6 +44,7 @@ public class AugmentGrammar implements ExportFormat {
4244
" S: Read Strongs and augment Morphology",
4345
" I: Read Indices and augment Morphology",
4446
" SI: Read Strongs and augment Indices",
47+
" SLI: Read Strongs that exist more than once as list and augment Indices",
4548
" IS: Read Indices and augment Strongs",
4649
"",
4750
"Database can only be used to augment using a mode if the mode was also present when analyzing."
@@ -62,7 +65,7 @@ public void doExport(Bible bible, String... exportArgs) throws Exception {
6265
if (exportArgs[0].equals("dump")) {
6366
boolean humanStrongs = exportArgs.length > 2 && exportArgs[2].equals("humanStrongs");
6467
try (BufferedWriter bw = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(exportArgs[1]), StandardCharsets.UTF_8))) {
65-
runOperation(bible, new GrammarOperation() {
68+
runOperation(bible, null, new GrammarOperation() {
6669

6770
private int counter = 0;
6871
private Reference lastReference = null;
@@ -91,7 +94,7 @@ public Visitor<RuntimeException> handleGrammar(Reference reference, Visitor<Runt
9194
} else if (exportArgs[0].equals("dumpwords")) {
9295
boolean humanStrongs = exportArgs.length > 2 && exportArgs[2].equals("humanStrongs");
9396
try (BufferedWriter bw = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(exportArgs[1]), StandardCharsets.UTF_8))) {
94-
runOperation(bible, new GrammarOperation() {
97+
runOperation(bible, null, new GrammarOperation() {
9598

9699
private int counter = 0;
97100
private Reference lastReference = null;
@@ -130,7 +133,8 @@ public boolean visitEnd() {
130133
props.load(in);
131134
}
132135
}
133-
runOperation(bible, new GrammarOperation() {
136+
Map<String,List<String>> strongsLists = new HashMap<>();
137+
runOperation(bible, null, new GrammarOperation() {
134138

135139
private void analyze(Reference reference, char separator, String[] srcVals, String suffix, String[] dstVals) {
136140
if (srcVals != null && dstVals != null && srcVals.length == dstVals.length) {
@@ -164,16 +168,35 @@ public Visitor<RuntimeException> handleGrammar(Reference reference, Visitor<Runt
164168
analyze(reference, '@', idxStrings, "", rmac);
165169
break;
166170
case INDEX2STRONGS:
167-
analyze(reference, '*', fullStrongs, "@", idxStrings);
171+
analyze(reference, '@', idxStrings, "@", fullStrongs);
172+
break;
173+
case STRONGSLIST2INDEX:
174+
if (idxStrings != null && fullStrongs != null && idxStrings.length == fullStrongs.length) {
175+
for (int i = 0; i < idxStrings.length; i++) {
176+
String key = reference.getBook().getOsisID() + "." + reference.getChapter() + "." + reference.getVerse() + '*' + fullStrongs[i] + "@L";
177+
strongsLists.computeIfAbsent(key, x -> new ArrayList<>()).add(idxStrings[i]);
178+
}
179+
}
168180
break;
169181
case STRONGS2INDEX:
170-
analyze(reference, '@', idxStrings, "@", fullStrongs);
182+
analyze(reference, '*', fullStrongs, "@", idxStrings);
171183
break;
172184
}
173185
}
174186
return next.visitGrammarInformation(strongsPrefixes, strongs, rmac, sourceIndices);
175187
}
176188
});
189+
for (Entry<String, List<String>> entry : strongsLists.entrySet()) {
190+
if (entry.getValue().size() == 1)
191+
continue;
192+
String key = entry.getKey();
193+
String value = String.join(",", entry.getValue());
194+
String oldVal = props.getProperty(key);
195+
if (oldVal == null)
196+
props.setProperty(key, value);
197+
else if (!oldVal.equals(value))
198+
props.setProperty(key, "*");
199+
}
177200
try (FileOutputStream out = new FileOutputStream(exportArgs[1])) {
178201
props.store(out, "AugmentGrammar database");
179202
}
@@ -182,7 +205,28 @@ public Visitor<RuntimeException> handleGrammar(Reference reference, Visitor<Runt
182205
try (FileInputStream in = new FileInputStream(exportArgs[1])) {
183206
props.load(in);
184207
}
185-
runOperation(bible, new GrammarOperation() {
208+
final Map<String,int[]> strongsCounters = new HashMap<>();
209+
GrammarOperation prepare = null;
210+
if (modes.contains(Mode.STRONGSLIST2INDEX)) {
211+
prepare = new GrammarOperation() {
212+
@Override
213+
public void reset() {
214+
strongsCounters.clear();
215+
}
216+
217+
@Override
218+
public Visitor<RuntimeException> handleGrammar(Reference reference, Visitor<RuntimeException> next, char[] strongsPrefixes, int[] strongs, String[] rmac, int[] sourceIndices) {
219+
String[] fullStrongs = buildFullStrongs(reference, strongsPrefixes, strongs, false);
220+
if (fullStrongs != null) {
221+
for (String entry : fullStrongs) {
222+
strongsCounters.computeIfAbsent(entry, x -> new int[2])[0]++;
223+
}
224+
}
225+
return next.visitGrammarInformation(strongsPrefixes, strongs, rmac, sourceIndices);
226+
}
227+
};
228+
}
229+
runOperation(bible, prepare, new GrammarOperation() {
186230
@Override
187231
public Visitor<RuntimeException> handleGrammar(Reference reference, Visitor<RuntimeException> next, char[] strongsPrefixes, int[] strongs, String[] rmac, int[] sourceIndices) {
188232
String[] fullStrongs = buildFullStrongs(reference, strongsPrefixes, strongs, false);
@@ -229,12 +273,28 @@ public Visitor<RuntimeException> handleGrammar(Reference reference, Visitor<Runt
229273
}
230274
}
231275
break;
276+
case STRONGSLIST2INDEX:
277+
if (fullStrongs != null && sourceIndices == null) {
278+
sourceIndices = new int[fullStrongs.length];
279+
for (int i = 0; i < fullStrongs.length; i++) {
280+
String value = props.getProperty(keyPrefix + "*" + fullStrongs[i] + "@L", "*");
281+
String[] parts = value.equals("*") ? null : value.split(",");
282+
int[] cnt = strongsCounters.get(fullStrongs[i]);
283+
if (value.equals("*") || parts.length != cnt[0]) {
284+
sourceIndices = null;
285+
break;
286+
}
287+
sourceIndices[i] = Integer.parseInt(parts[cnt[1]]);
288+
cnt[1]++;
289+
}
290+
}
291+
break;
232292
case STRONGS2INDEX:
233293
if (fullStrongs != null && sourceIndices == null) {
234294
sourceIndices = new int[fullStrongs.length];
235295
for (int i = 0; i < fullStrongs.length; i++) {
236296
String value = props.getProperty(keyPrefix + "*" + fullStrongs[i] + "@", "*");
237-
if (rmac[i].equals("*")) {
297+
if (value.equals("*")) {
238298
sourceIndices = null;
239299
break;
240300
}
@@ -252,7 +312,7 @@ public Visitor<RuntimeException> handleGrammar(Reference reference, Visitor<Runt
252312
exportFormat.doExport(bible, Arrays.copyOfRange(exportArgs, 3, exportArgs.length));
253313

254314
} else if (exportArgs[0].equals("addsourceindex")) {
255-
runOperation(bible, new GrammarOperation() {
315+
runOperation(bible, null, new GrammarOperation() {
256316
private int counter = 0;
257317
private Reference lastReference = null;
258318

@@ -290,15 +350,20 @@ private static String[] buildFullStrongs(Versification.Reference reference, char
290350
return result;
291351
}
292352

293-
protected <T extends Throwable> void runOperation(Bible bible, GrammarOperation operation) throws T {
353+
protected <T extends Throwable> void runOperation(Bible bible, GrammarOperation prepare, GrammarOperation operation) throws T {
294354
for (Book book : bible.getBooks()) {
295355
int cnum = 0;
296356
for (Chapter chapter : book.getChapters()) {
297357
cnum++;
298358
List<Verse> verses = chapter.getVerses();
299359
for (int i = 0; i < verses.size(); i++) {
300360
Verse v1 = verses.get(i);
361+
if (prepare != null) {
362+
prepare.reset();
363+
v1.accept(new GrammarOperationVisitor(new FormattedText().getAppendVisitor(), new Versification.Reference(book.getId(), cnum, v1.getNumber()), prepare));
364+
}
301365
Verse v2 = new Verse(v1.getNumber());
366+
operation.reset();
302367
v1.accept(new GrammarOperationVisitor(v2.getAppendVisitor(), new Versification.Reference(book.getId(), cnum, v1.getNumber()), operation));
303368
v2.finished();
304369
verses.set(i, v2);
@@ -308,7 +373,7 @@ protected <T extends Throwable> void runOperation(Bible bible, GrammarOperation
308373
}
309374

310375
private static enum Mode {
311-
STRONGS2MORPH("S"), INDEX2MORPH("I"), STRONGS2INDEX("SI"), INDEX2STRONGS("IS");
376+
STRONGS2MORPH("S"), INDEX2MORPH("I"), STRONGS2INDEX("SI"), STRONGSLIST2INDEX("SLI"), INDEX2STRONGS("IS");
312377

313378
private final String code;
314379

@@ -326,6 +391,7 @@ private static Mode fromCode(String code) {
326391
}
327392

328393
private static interface GrammarOperation {
394+
public default void reset() {}
329395
public abstract Visitor<RuntimeException> handleGrammar(Versification.Reference reference, Visitor<RuntimeException> next, char[] strongsPrefixes, int[] strongs, String[] rmac, int[] sourceIndices);
330396
}
331397

0 commit comments

Comments
 (0)