Skip to content

Commit 430e6f4

Browse files
committed
#401 support rdf file-based vocabulary + method to build hierarchy combining base SPARQL query and R
1 parent ac42dba commit 430e6f4

File tree

5 files changed

+120
-39
lines changed

5 files changed

+120
-39
lines changed

DESCRIPTION

+1
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,7 @@ Imports:
4444
whisker,
4545
mime,
4646
digest,
47+
dplyr,
4748
plyr,
4849
readr,
4950
arrow,

NAMESPACE

+1
Original file line numberDiff line numberDiff line change
@@ -109,6 +109,7 @@ import(R6)
109109
import(XML)
110110
import(arrow)
111111
import(dotenv)
112+
import(dplyr)
112113
import(geometa)
113114
import(geonapi)
114115
import(geosapi)

R/geoflow.R

+1
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@
2424
#' @import httr
2525
#' @import mime
2626
#' @import jsonlite
27+
#' @import dplyr
2728
#' @import yaml
2829
#' @import XML
2930
#' @import xml2

R/geoflow_vocabulary.R

+109-35
Original file line numberDiff line numberDiff line change
@@ -61,17 +61,42 @@ geoflow_vocabulary <- R6Class("geoflow_vocabulary",
6161
geoflow_skos_vocabulary <- R6Class("geoflow_skos_vocabulary",
6262
inherit = geoflow_vocabulary,
6363
public = list(
64+
#'@field rdf rdf
65+
rdf = NULL,
6466
#'@field endpoint endpoint
65-
endpoint = NA,
67+
endpoint = NULL,
6668

6769
#'@description Initializes a vocabulary
6870
#'@param id id
6971
#'@param def def
7072
#'@param uri uri
71-
#'@param endpoint endpoint
72-
initialize = function(id, def, uri, endpoint){
73+
#'@param endpoint A Sparql endpoint
74+
#'@param file a RDF file
75+
initialize = function(id, def, uri, endpoint = NULL, file = NULL){
7376
super$initialize(id, def, uri, software_type = "sparql")
7477
self$endpoint = endpoint
78+
79+
#case of RDF resource
80+
if(!is.null(file)){
81+
if(startsWith(file, "http")){
82+
download.file(url = file, destfile = file.path(tempdir(), basename(file)), mode = "wb")
83+
file = file.path(tempdir(), basename(file))
84+
}
85+
if(mime::guess_type(file) %in% c("application/gzip", "application/zip")){
86+
switch(mime::guess_type(file),
87+
"application/gzip" = {
88+
trg_file = file.path(tempdir(), paste0(id, ".rdf"))
89+
readr::write_lines(readLines(gzfile(file, "r"), warn = F), file = trg_file)
90+
self$rdf = rdflib::rdf_parse(trg_file)
91+
},
92+
"application/zip" = {
93+
trg_file = as.character(unzip(zipfile = file, list = T)[1])
94+
unzip(zipfile = file, exdir = tempdir())
95+
self$rdf = rdflib::rdf_parse(file.path(tempdir(), trg_file))
96+
}
97+
)
98+
}
99+
}
75100
},
76101

77102
#'@description query
@@ -80,20 +105,24 @@ geoflow_skos_vocabulary <- R6Class("geoflow_skos_vocabulary",
80105
#'@param mimetype mimetype
81106
#'@return the response of the SPARQL query
82107
query = function(str, graphUri = NULL, mimetype = "text/csv"){
83-
req_body = list(query = str)
84-
if(!is.null(graphUri)) req_body$graphUri = graphUri
85-
86-
req = httr::with_verbose(httr::POST(
87-
url = self$endpoint,
88-
encode = "form",
89-
body = req_body,
90-
httr::add_headers(
91-
"Content-Type" = "application/x-www-form-urlencoded",
92-
"User-Agent" = paste("geoflow", packageVersion("geoflow"), sep = "_"),
93-
"Accept" = mimetype
94-
)
95-
))
96-
httr::content(req)
108+
if(!is.null(self$endpoint)){
109+
req_body = list(query = str)
110+
if(!is.null(graphUri)) req_body$graphUri = graphUri
111+
112+
req = httr::with_verbose(httr::POST(
113+
url = self$endpoint,
114+
encode = "form",
115+
body = req_body,
116+
httr::add_headers(
117+
"Content-Type" = "application/x-www-form-urlencoded",
118+
"User-Agent" = paste("geoflow", packageVersion("geoflow"), sep = "_"),
119+
"Accept" = mimetype
120+
)
121+
))
122+
httr::content(req)
123+
}else if(!is.null(self$rdf)){
124+
rdflib::rdf_query(rdf = self$rdf, query = str, data.frame = T)
125+
}
97126
},
98127

99128
#'@description Ping query
@@ -141,32 +170,71 @@ geoflow_skos_vocabulary <- R6Class("geoflow_skos_vocabulary",
141170

142171
#'@description list_concepts
143172
#'@param lang lang
144-
#'@param mimetype mimetype
173+
#'@param method method used to build the hierarchy, either "SPARQL" or "R"
145174
#'@param out_format output format (tibble or list). Default is "tibble"
146175
#'@return the response of the SPARQL query
147-
get_concepts_hierarchy = function(lang = "en", mimetype = "text/csv",
176+
get_concepts_hierarchy = function(lang = "en",
177+
method = c("SPARQL","R"),
148178
out_format = c("tibble","list")){
179+
180+
method = match.arg(method)
149181
out_format = match.arg(out_format)
150-
str = paste0("
151-
PREFIX skos: <http://www.w3.org/2004/02/skos/core#>
152-
153-
SELECT ?broaderConcept ?broaderPrefLabel ?concept ?prefLabel WHERE {
154-
?concept a skos:Concept .
155-
OPTIONAL {
156-
?concept skos:prefLabel ?prefLabel .
157-
FILTER (LANG(?prefLabel) = \"",lang,"\")
158-
}
159-
OPTIONAL {
160-
?concept skos:broader ?broaderConcept .
182+
183+
out <-switch(method,
184+
"SPARQL" = {
185+
str = paste0("
186+
PREFIX skos: <http://www.w3.org/2004/02/skos/core#>
187+
188+
SELECT ?broaderConcept ?broaderPrefLabel ?concept ?prefLabel WHERE {
189+
?concept a skos:Concept .
161190
OPTIONAL {
162-
?broaderConcept skos:prefLabel ?broaderPrefLabel .
163-
FILTER (LANG(?broaderPrefLabel) = \"",lang,"\")
191+
?concept skos:prefLabel ?prefLabel .
192+
FILTER (LANG(?prefLabel) = \"",lang,"\")
193+
}
194+
OPTIONAL {
195+
?concept skos:broader ?broaderConcept .
196+
OPTIONAL {
197+
?broaderConcept skos:prefLabel ?broaderPrefLabel .
198+
FILTER (LANG(?broaderPrefLabel) = \"",lang,"\")
199+
}
164200
}
165201
}
202+
ORDER BY ?concept
203+
")
204+
self$query(str = str, mimetype = mimetype)
205+
},
206+
"R" = {
207+
filter_by_language <- function(df, language) {
208+
df[!is.na(df$lang),] %>%
209+
dplyr::filter(lang == language)
210+
}
211+
#perform base sparql result
212+
sparql_result = self$query(
213+
str = "SELECT ?s ?p ?o ?lang WHERE {
214+
?s ?p ?o .
215+
OPTIONAL {
216+
BIND(LANG(?o) AS ?lang)
217+
}
218+
}",
219+
mimetype = "text/csv"
220+
)
221+
# Create a hierarchy data.frame
222+
sparql_result %>%
223+
dplyr::filter(p == "http://www.w3.org/2004/02/skos/core#broader") %>%
224+
dplyr::rename(concept = s, broaderConcept = o) %>%
225+
dplyr::select(concept, broaderConcept) %>%
226+
dplyr::left_join(
227+
filter_by_language(sparql_result %>% filter(p == "http://www.w3.org/2004/02/skos/core#prefLabel"), lang) %>% rename(concept = s, prefLabel = o),
228+
by = "concept"
229+
) %>%
230+
dplyr::left_join(
231+
filter_by_language(sparql_result %>% filter(p == "http://www.w3.org/2004/02/skos/core#prefLabel"), lang) %>% rename(broaderConcept = s, broaderPrefLabel = o),
232+
by = "broaderConcept"
233+
) %>%
234+
dplyr::select(broaderConcept, broaderPrefLabel, concept, prefLabel)
166235
}
167-
ORDER BY ?concept
168-
")
169-
out = self$query(str = str, mimetype = mimetype)
236+
)
237+
170238
out[is.na(out$broaderPrefLabel),]$broaderPrefLabel = "root"
171239
if(out_format == "list"){
172240
relationships <- precompute_relationships(as.data.frame(out), "broaderPrefLabel", "prefLabel");
@@ -296,6 +364,12 @@ geoflow_skos_vocabulary <- R6Class("geoflow_skos_vocabulary",
296364
#'
297365
register_vocabularies = function(){
298366
vocabularies <- list(
367+
geoflow_skos_vocabulary$new(
368+
id = "gemet",
369+
def = "GEMET Thesaurus",
370+
uri = "https://www.eionet.europa.eu/gemet",
371+
file = "https://www.eionet.europa.eu/gemet/latest/gemet.rdf.gz"
372+
),
299373
geoflow_skos_vocabulary$new(
300374
id = "agrovoc",
301375
def = "AGROVOC Thesaurus",

man/geoflow_skos_vocabulary.Rd

+8-4
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)