Skip to content

Commit e21d567

Browse files
committed
First commit
0 parents  commit e21d567

File tree

8 files changed

+844
-0
lines changed

8 files changed

+844
-0
lines changed

.gitignore

+2
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
.DS_Store
2+
snipplets

DEPENDENCY

+5
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
{MongoDB}
2+
{MongoDB C++ driver}
3+
Take a look at makefile (currently not configured at all)
4+
make
5+
Test with small .osm file

README

+118
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,118 @@
1+
osmo: Populate a MongoDB collection with OpenStreetMap data
2+
3+
4+
Features
5+
6+
- osmo populates a MongoDB database collection with all nodes, ways and relations
7+
- Currently, only OSM XML (.osm) is supported
8+
- It's just slightly slower than Osmosis (with Postgres)
9+
- Prerequisites: see DEPENDENCY
10+
- Building:
11+
12+
make
13+
14+
(A priority is it to make this work in both Mac OS and Linux!)
15+
16+
- Synopsis:
17+
18+
bin/osmo DATAFILE DATABASE.COLLECTION
19+
20+
- To add an index on the primary key:
21+
22+
mongo DATABASE --eval \
23+
"printjson(db.COLLECTION.ensureIndex({ p: 1, id: 1 }, { unique: true, dropDups: true, background: true }))"
24+
25+
26+
Data model
27+
28+
The data model supported by osmo is focussed on minimal data size and unification. Some principles:
29+
30+
- nodes, ways and relations are transformed to GeoObject instances to unify the concept and to simplify queries on only 1 collection
31+
- Every geoObject has a key named "p" (prefix). It can have the values, "n" (node), "w" (way) or "r" (relation).
32+
- keys for regular fields (user, timestamp, ...) are abbreviated to save space
33+
- The geoObject primary key is [ 'p', 'id' ]
34+
- Example for a GeoObject instance:
35+
36+
{
37+
"_id" : ObjectId("4e3213dbe9b522a655e3f5ff"),
38+
"c" : 4977984,
39+
"id" : 318214,
40+
"p" : "r",
41+
"parts" : {
42+
"list" : [
43+
{
44+
"p" : "n",
45+
"id" : 319621296
46+
},
47+
{
48+
"p" : "n",
49+
"id" : 560090559
50+
},
51+
{
52+
"p" : "n",
53+
"id" : 292635574
54+
},
55+
{
56+
"p" : "n",
57+
"id" : 292635576
58+
},
59+
{
60+
"p" : "n",
61+
"id" : 245431191
62+
},
63+
{
64+
"p" : "w",
65+
"id" : 48289915
66+
},
67+
{
68+
"p" : "w",
69+
"id" : 49366289
70+
},
71+
...
72+
],
73+
"info" : [
74+
{
75+
"role" : "forward"
76+
},
77+
{
78+
"role" : "forward"
79+
},
80+
{
81+
"role" : ""
82+
},
83+
{
84+
"role" : "forward"
85+
},
86+
{
87+
"role" : ""
88+
},
89+
{
90+
"role" : "forward"
91+
},
92+
{
93+
"role" : "forward"
94+
},
95+
...
96+
]
97+
},
98+
"t" : 1276427410,
99+
"tags" : {
100+
"network" : "VRT",
101+
"operator" : "SWT",
102+
"ref" : "4",
103+
"route" : "bus",
104+
"type" : "route"
105+
},
106+
"u" : {
107+
"id" : 109925,
108+
"name" : "WanMil"
109+
},
110+
"v" : 6
111+
}
112+
113+
- "parts" is an ObjectView object that has always two properties: "list" (a list of GeoObject references) and "info" (that optionally holds information about the referenced objects in the same order). GeoObjects with p=n do not have parts, with p=w, parts are always references to nodes, with p=r parts can reference any type (node, way, relation).
114+
115+
116+
Contribute
117+
118+
Take a look at TODO to see a list of priorized features that are still lacking.

TODO

+16
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
Next steps
2+
3+
Addd option parsing
4+
connection data
5+
.osc support
6+
Validate data
7+
Assert that necessary attributes for an element are set
8+
...
9+
Debug & unify makefile (must work at least in Linux, MacOS)
10+
configure?
11+
I need help here
12+
13+
14+
Possible features
15+
16+
Support .pbf

makefile

+9
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
CC = g++
2+
CFLAGS += -Wall -D_LARGEFILE_SOURCE -D_FILE_OFFSET_BITS=64 -O3 -lmongoclient -lboost_thread -lboost_filesystem -lboost_program_options -lboost_system -lpthread -lexpat -I/Users/andi/.libraries/mongo
3+
4+
bin/osmo:
5+
$(CC) $(CFLAGS) src/osmo.cc -o bin/osmo
6+
7+
.PHONY : clean
8+
clean:
9+
rm bin/*

src/mongodb.h

+189
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,189 @@
1+
#include <string>
2+
#include <cstring>
3+
#include <sstream>
4+
#include <iostream>
5+
#include <unistd.h>
6+
#include <expat.h>
7+
#include "client/dbclient.h"
8+
9+
using namespace mongo;
10+
using namespace osmo;
11+
12+
13+
14+
15+
namespace osmo {
16+
17+
namespace mongodb {
18+
19+
typedef struct {
20+
DBClientConnection* c;
21+
const char* collection;
22+
GeoObject geoObject;
23+
} context;
24+
25+
const int BUFFER_SIZE = 10240 * 1;
26+
27+
void init_object (GeoObject* geoObject, const XML_Char* element, const XML_Char** attrs) {
28+
29+
geoObject->reset();
30+
geoObject->set_attr("p", element);
31+
for (int i = 0; attrs[i]; i += 2) {
32+
33+
geoObject->set_attr(attrs[i], attrs[i + 1]);
34+
}
35+
}
36+
37+
void XMLCALL startElement(void* data, const XML_Char* element, const XML_Char** attrs) {
38+
39+
context* ct = (context*)data;
40+
GeoObject* geoObject = &ct->geoObject;
41+
42+
// order in the following "if" statements is based on frequency of tags in planet file
43+
if (!strcmp(element, "nd")) {
44+
45+
GeoObjectRef geoObjectRef;
46+
geoObjectRef.set_attr("p", "n");
47+
for (int i = 0; attrs[i]; i += 2) {
48+
49+
geoObjectRef.set_attr(attrs[i], attrs[i + 1]);
50+
}
51+
geoObject->parts->add(geoObjectRef);
52+
}
53+
else if (!strcmp(element, "node")) {
54+
55+
init_object(geoObject, element, attrs);
56+
}
57+
else if (!strcmp(element, "tag")) {
58+
59+
Tag tag;
60+
for (int i = 0; attrs[i]; i += 2) {
61+
62+
tag.set_attr(attrs[i], attrs[i + 1]);
63+
}
64+
geoObject->add_tag(tag);
65+
}
66+
else if (!strcmp(element, "way")) {
67+
68+
init_object(geoObject, element, attrs);
69+
}
70+
else if (!strcmp(element, "member")) {
71+
72+
GeoObjectRef geoObjectRef;
73+
GeoObjectRel geoObjectRel;
74+
for (int i = 0; attrs[i]; i += 2) {
75+
76+
geoObjectRef.set_attr(attrs[i], attrs[i + 1]);
77+
geoObjectRel.set_attr(attrs[i], attrs[i + 1]);
78+
}
79+
geoObject->parts->add(geoObjectRef, geoObjectRel);
80+
}
81+
else if (!strcmp(element, "relation")) {
82+
83+
init_object(geoObject, element, attrs);
84+
}
85+
}
86+
87+
void XMLCALL endElement(void *data, const XML_Char* element) {
88+
89+
context* ct = (context*)data;
90+
91+
if (!strcmp(element, "node") || !strcmp(element, "way") || !strcmp(element, "relation")) {
92+
93+
ct->c->insert(ct->collection, ct->geoObject.getBson());
94+
//std::cout << ct->c->getLastErrorDetailed().jsonString() << " ";
95+
}
96+
}
97+
98+
void parse (int fd, DBClientConnection* c, const char* collection) {
99+
100+
bool done;
101+
register context ct = {
102+
c,
103+
collection,
104+
GeoObject()
105+
};
106+
107+
//c->dropCollection(collection);
108+
109+
XML_Parser parser = XML_ParserCreate(0);
110+
if (!parser) {
111+
throw std::runtime_error("Error creating parser");
112+
}
113+
114+
XML_SetUserData(parser, (void*)&ct);
115+
116+
XML_SetElementHandler(parser, startElement, endElement);
117+
118+
do {
119+
void *buffer = XML_GetBuffer(parser, BUFFER_SIZE);
120+
if (buffer == 0) {
121+
throw std::runtime_error("Out of memory");
122+
}
123+
124+
int result = read(fd, buffer, BUFFER_SIZE);
125+
if (result < 0) {
126+
exit(1);
127+
}
128+
done = (result == 0);
129+
if (XML_ParseBuffer(parser, result, done) == XML_STATUS_ERROR)
130+
{
131+
XML_Error errorCode = XML_GetErrorCode(parser);
132+
long errorLine = XML_GetCurrentLineNumber(parser);
133+
long errorCol = XML_GetCurrentColumnNumber(parser);
134+
const XML_LChar *errorString = XML_ErrorString(errorCode);
135+
136+
std::stringstream errorDesc;
137+
errorDesc << "XML parsing error at line " << errorLine << ":" << errorCol;
138+
errorDesc << ": " << errorString;
139+
throw std::runtime_error(errorDesc.str());
140+
}
141+
} while (!done);
142+
143+
XML_ParserFree(parser);
144+
//error = "";
145+
}
146+
147+
148+
void osm2db (const char* file, const char* collection) {
149+
150+
int fd = open(file, O_RDONLY);
151+
if (fd < 0) {
152+
153+
std::cerr << "Can't open data file" << std::endl;
154+
exit(1);
155+
}
156+
157+
DBClientConnection* c = new DBClientConnection;
158+
try {
159+
160+
c->connect("localhost");
161+
} catch (DBException &e) {
162+
163+
std::cerr << "Cannot connect: " << e.what() << std::endl;
164+
exit(1);
165+
}
166+
167+
168+
char *suffix = strrchr(file, '.');
169+
if (!strcmp(suffix, ".osm")) {
170+
171+
parse(fd, c, collection);
172+
}
173+
/*else if (!strcmp(suffix, ".osc")) {
174+
175+
}
176+
else if (!strcmp(suffix, ".pbf")) {
177+
178+
}*/
179+
else {
180+
181+
std::cerr << "Unknown file suffix: " << suffix << std::endl;
182+
exit(1);
183+
}
184+
185+
186+
close(fd);
187+
}
188+
}
189+
}

src/osmo.cc

+34
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
#include <iostream>
2+
/*#include <cmath>*/
3+
#include "osmo.h"
4+
5+
6+
7+
8+
int main (int c, char *argv[]) {
9+
10+
if (c >= 3) {
11+
12+
osmo::mongodb::osm2db(argv[1], argv[2]);
13+
}
14+
15+
/*
16+
GeoObject* g = new GeoObject;
17+
g->p = GeoObject::P_node;
18+
g->id = 1000;
19+
Tag* t = new Tag;
20+
21+
memccpy(t->k, "amenity", 0, 255);
22+
memccpy(t->v, "cafe", 0, 255);
23+
g->add_tag(t);
24+
g->add_tag(t);
25+
26+
g->parts->add(g);
27+
28+
std::cout << g->parts->refs[0]->id;
29+
*/
30+
31+
std::cout << "ready";
32+
33+
return 0;
34+
}

0 commit comments

Comments
 (0)