Skip to content

Commit 42e0c39

Browse files
authored
Merge pull request #850 from lonvia/export-by-country
Add caching of address information for Nominatim export
2 parents 27cc937 + 23c18ef commit 42e0c39

22 files changed

+849
-506
lines changed

app/opensearch/src/main/java/de/komoot/photon/opensearch/OpenSearchResult.java

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,6 @@
11
package de.komoot.photon.opensearch;
22

33
import de.komoot.photon.searcher.PhotonResult;
4-
import jakarta.json.JsonArray;
54
import org.json.JSONObject;
65

76
import java.util.Map;

src/main/java/de/komoot/photon/App.java

Lines changed: 87 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,8 @@
22

33
import com.beust.jcommander.JCommander;
44
import com.beust.jcommander.ParameterException;
5-
import de.komoot.photon.nominatim.NominatimConnector;
5+
import de.komoot.photon.nominatim.ImportThread;
6+
import de.komoot.photon.nominatim.NominatimImporter;
67
import de.komoot.photon.nominatim.NominatimUpdater;
78
import de.komoot.photon.searcher.ReverseHandler;
89
import de.komoot.photon.searcher.SearchHandler;
@@ -14,7 +15,8 @@
1415

1516
import java.io.FileNotFoundException;
1617
import java.io.IOException;
17-
import java.util.Date;
18+
import java.util.*;
19+
import java.util.concurrent.ConcurrentLinkedQueue;
1820

1921
import static spark.Spark.*;
2022

@@ -107,9 +109,8 @@ private static void startJsonDump(CommandLineArgs args) {
107109
try {
108110
final String filename = args.getJsonDump();
109111
final JsonDumper jsonDumper = new JsonDumper(filename, args.getLanguages(), args.getExtraTags());
110-
NominatimConnector nominatimConnector = new NominatimConnector(args.getHost(), args.getPort(), args.getDatabase(), args.getUser(), args.getPassword());
111-
nominatimConnector.setImporter(jsonDumper);
112-
nominatimConnector.readEntireDatabase(args.getCountryCodes());
112+
113+
importFromDatabase(args, jsonDumper);
113114
LOGGER.info("Json dump was created: {}", filename);
114115
} catch (FileNotFoundException e) {
115116
throw new UsageException("Cannot create dump: " + e.getMessage());
@@ -121,22 +122,95 @@ private static void startJsonDump(CommandLineArgs args) {
121122
* Read all data from a Nominatim database and import it into a Photon database.
122123
*/
123124
private static void startNominatimImport(CommandLineArgs args, Server esServer) {
124-
DatabaseProperties dbProperties;
125-
NominatimConnector nominatimConnector = new NominatimConnector(args.getHost(), args.getPort(), args.getDatabase(), args.getUser(), args.getPassword());
126-
Date importDate = nominatimConnector.getLastImportDate();
125+
final var languages = initDatabase(args, esServer);
126+
127+
LOGGER.info("Starting import from nominatim to photon with languages: {}", String.join(",", languages));
128+
importFromDatabase(args, esServer.createImporter(languages, args.getExtraTags()));
129+
130+
LOGGER.info("Imported data from nominatim to photon with languages: {}", String.join(",", languages));
131+
}
132+
133+
private static String[] initDatabase(CommandLineArgs args, Server esServer) {
134+
final var nominatimConnector = new NominatimImporter(args.getHost(), args.getPort(), args.getDatabase(), args.getUser(), args.getPassword());
135+
final Date importDate = nominatimConnector.getLastImportDate();
136+
127137
try {
128-
dbProperties = esServer.recreateIndex(args.getLanguages(), importDate, args.getSupportStructuredQueries()); // clear out previous data
138+
// Clear out previous data.
139+
var dbProperties = esServer.recreateIndex(args.getLanguages(), importDate, args.getSupportStructuredQueries());
140+
return dbProperties.getLanguages();
129141
} catch (IOException e) {
130142
throw new UsageException("Cannot setup index, elastic search config files not readable");
131143
}
144+
}
145+
146+
private static void importFromDatabase(CommandLineArgs args, Importer importer) {
147+
final var connector = new NominatimImporter(args.getHost(), args.getPort(), args.getDatabase(), args.getUser(), args.getPassword());
148+
connector.prepareDatabase();
149+
connector.loadCountryNames();
132150

133-
LOGGER.info("Starting import from nominatim to photon with languages: {}", String.join(",", dbProperties.getLanguages()));
134-
nominatimConnector.setImporter(esServer.createImporter(dbProperties.getLanguages(), args.getExtraTags()));
135-
nominatimConnector.readEntireDatabase(args.getCountryCodes());
151+
String[] countries = args.getCountryCodes();
152+
153+
if (countries == null || countries.length == 0) {
154+
countries = connector.getCountriesFromDatabase();
155+
} else {
156+
countries = Arrays.stream(countries).map(String::trim).filter(s -> !s.isBlank()).toArray(String[]::new);
157+
}
158+
159+
final int numThreads = args.getThreads();
160+
ImportThread importThread = new ImportThread(importer);
161+
162+
try {
163+
164+
if (numThreads == 1) {
165+
for (var country : countries) {
166+
connector.readCountry(country, importThread);
167+
}
168+
} else {
169+
final Queue<String> todolist = new ConcurrentLinkedQueue<>(List.of(countries));
170+
171+
final List<Thread> readerThreads = new ArrayList<>(numThreads);
172+
173+
for (int i = 0; i < numThreads; ++i) {
174+
final NominatimImporter threadConnector;
175+
if (i > 0) {
176+
threadConnector = new NominatimImporter(args.getHost(), args.getPort(), args.getDatabase(), args.getUser(), args.getPassword());
177+
threadConnector.loadCountryNames();
178+
} else {
179+
threadConnector = connector;
180+
}
181+
final int threadno = i;
182+
Runnable runner = () -> {
183+
String nextCc = todolist.poll();
184+
while (nextCc != null) {
185+
LOGGER.info("Thread {}: reading country '{}'", threadno, nextCc);
186+
threadConnector.readCountry(nextCc, importThread);
187+
nextCc = todolist.poll();
188+
}
189+
};
190+
Thread thread = new Thread(runner);
191+
thread.start();
192+
readerThreads.add(thread);
193+
}
194+
readerThreads.forEach(t -> {
195+
while (true) {
196+
try {
197+
t.join();
198+
break;
199+
} catch (InterruptedException e) {
200+
LOGGER.warn("Thread interrupted:", e);
201+
// Restore interrupted state.
202+
Thread.currentThread().interrupt();
203+
}
204+
}
205+
});
206+
}
207+
} finally {
208+
importThread.finish();
209+
}
136210

137-
LOGGER.info("Imported data from nominatim to photon with languages: {}", String.join(",", dbProperties.getLanguages()));
138211
}
139212

213+
140214
private static void startNominatimUpdateInit(CommandLineArgs args) {
141215
NominatimUpdater nominatimUpdater = new NominatimUpdater(args.getHost(), args.getPort(), args.getDatabase(), args.getUser(), args.getPassword());
142216
nominatimUpdater.initUpdates(args.getNominatimUpdateInit());

src/main/java/de/komoot/photon/CommandLineArgs.java

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,9 @@
1414
@Parameters(parametersValidators = CorsMutuallyExclusiveValidator.class)
1515
public class CommandLineArgs {
1616

17+
@Parameter(names = "-j", description = "Number of threads to use for import.")
18+
private int threads = 1;
19+
1720
@Parameter(names = "-structured", description = "Enable support for structured queries.")
1821
private boolean supportStructuredQueries = false;
1922

@@ -107,6 +110,10 @@ public String[] getLanguages() {
107110
return getLanguages(true);
108111
}
109112

113+
public int getThreads() {
114+
return Integer.min(10, Integer.max(0, threads));
115+
}
116+
110117
public String getCluster() {
111118
return this.cluster;
112119
}

src/main/java/de/komoot/photon/PhotonDoc.java

Lines changed: 30 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
package de.komoot.photon;
22

3+
import de.komoot.photon.nominatim.model.AddressRow;
34
import org.locationtech.jts.geom.Envelope;
45
import org.locationtech.jts.geom.Geometry;
56
import org.locationtech.jts.geom.Point;
@@ -217,17 +218,27 @@ public boolean isUsefulForIndex() {
217218
private void extractAddress(Map<String, String> address, AddressType addressType, String addressFieldName) {
218219
String field = address.get(addressFieldName);
219220

220-
if (field != null) {
221-
Map<String, String> map = addressParts.computeIfAbsent(addressType, k -> new HashMap<>());
221+
if (field == null) {
222+
return;
223+
}
222224

225+
Map<String, String> map = addressParts.get(addressType);
226+
if (map == null) {
227+
map = new HashMap<>();
228+
map.put("name", field);
229+
addressParts.put(addressType, map);
230+
} else {
223231
String existingName = map.get("name");
224232
if (!field.equals(existingName)) {
233+
// Make a copy of the original name map because the map is reused for other addresses.
234+
map = new HashMap<>(map);
225235
LOGGER.debug("Replacing {} name '{}' with '{}' for osmId #{}", addressFieldName, existingName, field, osmId);
226236
// we keep the former name in the context as it might be helpful when looking up typos
227237
if (!Objects.isNull(existingName)) {
228238
context.add(Collections.singletonMap("formerName", existingName));
229239
}
230240
map.put("name", field);
241+
addressParts.put(addressType, map);
231242
}
232243
}
233244
}
@@ -241,6 +252,23 @@ public boolean setAddressPartIfNew(AddressType addressType, Map<String, String>
241252
return addressParts.computeIfAbsent(addressType, k -> names) == names;
242253
}
243254

255+
/**
256+
* Complete address data from a list of address rows.
257+
*/
258+
public void completePlace(List<AddressRow> addresses) {
259+
final AddressType doctype = getAddressType();
260+
for (AddressRow address : addresses) {
261+
final AddressType atype = address.getAddressType();
262+
263+
if (atype != null
264+
&& (atype == doctype || !setAddressPartIfNew(atype, address.getName()))
265+
&& address.isUsefulForContext()) {
266+
// no specifically handled item, check if useful for context
267+
getContext().add(address.getName());
268+
}
269+
}
270+
}
271+
244272
public void setCountry(Map<String, String> names) {
245273
addressParts.put(AddressType.COUNTRY, names);
246274
}

src/main/java/de/komoot/photon/nominatim/DBDataAdapter.java

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,4 +30,9 @@ public interface DBDataAdapter {
3030
* Wrap a DELETE statement with a RETURNING clause.
3131
*/
3232
String deleteReturning(String deleteSQL, String columns);
33+
34+
/**
35+
* Wrap function to create a json array from a SELECT.
36+
*/
37+
String jsonArrayFromSelect(String valueSQL, String fromSQL);
3338
}

src/main/java/de/komoot/photon/nominatim/ImportThread.java

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -11,12 +11,12 @@
1111
/**
1212
* Worker thread for bulk importing data from a Nominatim database.
1313
*/
14-
class ImportThread {
14+
public class ImportThread {
1515
private static final Logger LOGGER = org.slf4j.LoggerFactory.getLogger(ImportThread.class);
1616

1717
private static final int PROGRESS_INTERVAL = 50000;
18-
private static final NominatimResult FINAL_DOCUMENT = new NominatimResult(new PhotonDoc(0, null, 0, null, null));
19-
private final BlockingQueue<NominatimResult> documents = new LinkedBlockingDeque<>(20);
18+
private static final NominatimResult FINAL_DOCUMENT = NominatimResult.fromAddress(new PhotonDoc(0, null, 0, null, null), null);
19+
private final BlockingQueue<NominatimResult> documents = new LinkedBlockingDeque<>(100);
2020
private final AtomicLong counter = new AtomicLong();
2121
private final Importer importer;
2222
private final Thread thread;
@@ -70,7 +70,8 @@ public void finish() {
7070
Thread.currentThread().interrupt();
7171
}
7272
}
73-
LOGGER.info("Finished import of {} photon documents.", counter.longValue());
73+
LOGGER.info("Finished import of {} photon documents. (Total processing time: {}s)",
74+
counter.longValue(), (System.currentTimeMillis() - startMillis)/1000);
7475
}
7576

7677
private class ImportRunnable implements Runnable {

0 commit comments

Comments
 (0)