Skip to content

Commit 42f8ebd

Browse files
authored
Use Julia packages instead of unix commands (#107)
* Use CURL_jll Tar_jll Gzip_jll instead of commands * julia1.8 required by CURL_jll * not Tar_jll for windows * allow CURL_jll 8 * play with dependencies * use ChannelBuffers for crul/gunzip/tar * cd out of dir to be removed * rename `metadata` to `getmeta` * using DataFrames restricted
1 parent 449d730 commit 42f8ebd

File tree

11 files changed

+87
-91
lines changed

11 files changed

+87
-91
lines changed

.github/workflows/CI.yml

+3-1
Original file line numberDiff line numberDiff line change
@@ -15,11 +15,13 @@ jobs:
1515
fail-fast: false
1616
matrix:
1717
version:
18-
- '1.0'
18+
- '1.6'
1919
- '1'
2020
- 'nightly'
2121
os:
2222
- ubuntu-latest
23+
- macos-latest
24+
- windows-latest
2325
arch:
2426
- x64
2527
steps:

.gitignore

+2
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,9 @@ myMatrixDepot*
1313

1414
*.mem
1515
*.cov
16+
lcov.info
1617

18+
/docs/build
1719
# ignore generated files from Sphinx
1820
_build
1921
_static

Project.toml

+4-4
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,9 @@
11
name = "MatrixDepot"
22
uuid = "b51810bb-c9f3-55da-ae3c-350fc1fbce05"
3-
version = "1.0.8"
3+
version = "1.0.9"
44

55
[deps]
6-
CodecZlib = "944b1d66-785c-5afd-91f1-9de20f533193"
6+
ChannelBuffers = "79a69506-cdd1-4876-b8e5-7af85e53af4f"
77
DataFrames = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0"
88
LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
99
MAT = "23992714-dd62-5051-b70f-ba57cb901cac"
@@ -14,8 +14,8 @@ Serialization = "9e88b42a-f829-5b0c-bbe9-9e923198166b"
1414
SparseArrays = "2f01184e-e22b-5df5-ae63-d93ebab69eaf"
1515

1616
[compat]
17-
CodecZlib = "0.6, 0.7"
18-
DataFrames = "0.20, 0.22, 0.21, 1.1, 1.2, 1.3"
17+
ChannelBuffers = "0.2, 0.3"
18+
DataFrames = "0.20, 0.22, 0.21, 1.1, 1.2, 1.3, 1.4, 1.5"
1919
MAT = "0.7, 0.8, 0.9, 0.10"
2020
Scratch = "1"
2121
julia = "1"

src/MatrixDepot.jl

+2-3
Original file line numberDiff line numberDiff line change
@@ -36,10 +36,10 @@ Access is like
3636
using MatrixDepot
3737
3838
A = matrixdepot("hilb", 10) # locally generated hilbert matrix dimensions (10,10)
39-
39+
4040
A = matrixdepot("HB/1138_bus") # named matrix of the SuiteSparse Collection
4141
A = matrixdepot(sp(1)) # same matrix using numerical id
42-
A = matrixdepot("Harwell*/*/1138_bus") # matrix from the Matrix Market Collection
42+
A = matrixdepot("Harwell*/*/1138_bus") # matrix from the Matrix Market Collection
4343
4444
md = mdopen("*/bfly") # named matrix with some extra data
4545
A = md.A
@@ -69,7 +69,6 @@ Access is like
6969
"""
7070
module MatrixDepot
7171
using LinearAlgebra, SparseArrays, Serialization
72-
using CodecZlib
7372
using Scratch
7473
import Base: show
7574

src/common.jl

+7-7
Original file line numberDiff line numberDiff line change
@@ -327,7 +327,7 @@ end
327327
function list!(db::MatrixDatabase, res::Vector{String}, p::Not)
328328
isempty(res) && return res
329329
cres = list!(db, copy(res), p.pattern)
330-
isempty(cres) && return res
330+
isempty(cres) && return res
331331
resall!(db, res)
332332
setdiff!(res, cres)
333333
end
@@ -348,7 +348,7 @@ end
348348

349349
list!(db::MatrixDatabase, res::Vector{String}, ::Tuple{}) = res
350350

351-
# logical AND
351+
# logical AND
352352
function list!(db::MatrixDatabase, res::Vector{String}, r::Tuple)
353353
isempty(res) && return res
354354
check_symbols(r)
@@ -419,7 +419,7 @@ mdatav(db::MatrixDatabase, p::Pattern) = verify_loaded(db, mdata(db, p))
419419
420420
Load data from remote repository for all problems matching pattern.
421421
422-
Return the number of successfully loaded matrices.
422+
Return the number of successfully loaded matrices.
423423
"""
424424
load(p::Pattern) = load(MATRIX_DB, p)
425425
load(db::MatrixDatabase, p::Pattern) = _load(db, loadmatrix, p)
@@ -486,13 +486,13 @@ function mdata(db::MatrixDatabase, p::Pattern)
486486
end
487487

488488
"""
489-
metadata([db, ], Union{MatrixDescriptor,MatrixData})
489+
getmeta([db, ], Union{MatrixDescriptor,MatrixData})
490490
491491
Return copy of list of metadata names.
492492
"""
493-
metadata(mdesc::MatrixDescriptor) = metadata(mdesc.data)
494-
metadata(data::RemoteMatrixData) = copy(data.metadata)
495-
metadata(data::MatrixData) = String[]
493+
getmeta(mdesc::MatrixDescriptor) = getmeta(mdesc.data)
494+
getmeta(data::RemoteMatrixData) = copy(data.metadata)
495+
getmeta(data::MatrixData) = String[]
496496

497497
_mdopen(data::RemoteMatrixData)= MatrixDescriptor(data)
498498
function _mdopen(data::GeneratedMatrixData, args...)

src/datareader.jl

+3-4
Original file line numberDiff line numberDiff line change
@@ -99,7 +99,7 @@ Example:
9999
"""
100100
metasymbols(md::MatrixDescriptor{<:RemoteMatrixData}) = metasymbols(md.data)
101101
function metasymbols(data::RemoteMatrixData)
102-
Symbol.(metastring.(data.name, metadata(data)))
102+
Symbol.(metastring.(data.name, getmeta(data)))
103103
end
104104
function metasymbols(md::MatrixDescriptor{<:GeneratedMatrixData})
105105
mdc = md.cache[]
@@ -118,7 +118,7 @@ end
118118
#internal helper to select special metadata (matrix, rhs, or solution)
119119
function metaname(data::RemoteMatrixData, exli::AbstractString...)
120120
base = rsplit(data.name, '/', limit=2)[end]
121-
meda = metadata(data)
121+
meda = getmeta(data)
122122
for ext in exli
123123
f = metastring_reverse(data, ext)
124124
if f in meda
@@ -165,12 +165,11 @@ function metastring_reverse(data::RemoteMatrixData, metaabbr::AbstractString)
165165
MTX = ".mtx"
166166
base = split(data.name, '/')[end]
167167
metaabbr == "A" && return string(base, MTX)
168-
mdata = metadata(data)
168+
mdata = getmeta(data)
169169
meta = string(base, '_', metaabbr)
170170
meta in mdata && return meta
171171
meta = string(meta, MTX)
172172
meta in mdata && return meta
173173
metaabbr
174174
end
175175
metastring_reverse(::MatrixData, metaabbr::AbstractString) = metaabbr
176-

src/download.jl

+37-55
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
#####################################################
44

55
using Base.Filesystem
6+
using ChannelBuffers
67

78
# collect the keys from local database (MATRIXDICT or USERMATRIXDICT)
89
# provide a numerical id counting from 1 for either database.
@@ -106,28 +107,6 @@ function update(db::MatrixDatabase=MATRIX_DB)
106107
downloadindices(db, ignoredb=true)
107108
end
108109

109-
function gunzip(fname)
110-
endswith(fname, ".gz") || error("gunzip: $fname: unknown suffix")
111-
112-
destname = rsplit(fname, ".gz", limit=2)[1]
113-
BUFFSIZE = 1000000
114-
try
115-
open(destname, "w") do f
116-
open(GzipDecompressorStream, fname) do g
117-
buffer = read(g, BUFFSIZE)
118-
while length(buffer) > 0
119-
write(f, buffer)
120-
buffer = read(g, BUFFSIZE)
121-
end
122-
end
123-
end
124-
catch
125-
@warn "decompression error - file $destname set to empty."
126-
open(destname, "w") do f; end
127-
end
128-
destname
129-
end
130-
131110
"""
132111
loadmatrix(data::RemoteMatrixData)
133112
@@ -144,28 +123,26 @@ function loadmatrix(data::RemoteMatrixData)
144123
addmetadata!(data)
145124
return 0
146125
end
147-
dirfn = localfile(data)
148126
dir = dirname(localdir(data))
127+
dirt = string(dir, ".tmp")
128+
rm(dirt, force=true, recursive=true)
129+
mkpath(dirt)
149130
url = redirect(dataurl(data))
150-
tarfile = ""
151-
152131
isdir(dir) || mkpath(dir)
153132
wdir = pwd()
154133
try
155134
@info("downloading: $url")
156-
downloadfile(url, dirfn)
157-
tarfile = gunzip(dirfn)
158-
cd(dir)
159-
rfile = relpath(string(tarfile))
160-
if endswith(tarfile, ".tar")
161-
run(`tar -xf $rfile`)
162-
rm(tarfile; force=true)
135+
pipe = downloadpipeline(url, dirt)
136+
wait(run(pipe))
137+
cd(dirt)
138+
for file in readdir(dirt)
139+
mv(file, joinpath(dir, file), force=true)
163140
end
164-
catch
165-
141+
catch ex
142+
@warn("download of $url failed: $ex")
166143
finally
167144
cd(wdir)
168-
rm(dirfn, force=true)
145+
rm(dirt, force=true, recursive=true)
169146
end
170147
addmetadata!(data)
171148
1
@@ -187,20 +164,19 @@ function loadinfo(data::RemoteMatrixData)
187164
return 0
188165
end
189166
url = redirect(dataurl(data))
190-
pipe = downloadpipeline(url)
167+
io = ChannelPipe()
168+
pipe = downloadpipeline(url, io)
169+
tl = run(pipe)
191170
out = IOBuffer()
192171
s = try
193172
@info("downloading head of $url")
194-
open(pipe, "r") do io
195-
skip = 0
196-
while ( s = readline(io) ) != ""
197-
skip = s[1] == '%' || isempty(strip(s)) ? 0 : skip + 1
198-
skip <= 1 && println(out, s)
199-
if skip == 1 && length(split(s)) == 3
200-
break
201-
end
202-
end;
203-
close(io)
173+
skip = 0
174+
while ( s = readskip(io) ) != ""
175+
skip = s[1] == '%' || isempty(strip(s)) ? 0 : skip + 1
176+
skip <= 1 && println(out, s)
177+
if skip == 1 && length(split(s)) == 3
178+
break
179+
end
204180
end
205181
String(take!(out))
206182
catch ex
@@ -225,22 +201,29 @@ loadinfo(data::MatrixData) = 0
225201
226202
Set up a command pipeline (external processes to download and expand data)
227203
"""
228-
function downloadpipeline(url::AbstractString)
204+
function downloadpipeline(url::AbstractString, dir=nothing)
229205
urls = rsplit(url, '.', limit=3)
230-
cmd = []
231-
push!(cmd, downloadcommand(url))
206+
cmd = Any[ChannelBuffers.curl(url)]
232207
if urls[end] == "gz"
233-
push!(cmd, `gzip -dc`)
208+
push!(cmd, gunzip())
234209
resize!(urls, length(urls)-1)
210+
url = url[1:end-3]
235211
end
236-
if urls[end] == "tar"
237-
push!(cmd, `tar -xOf -`)
212+
if dir isa AbstractString
213+
if urls[end] == "tar"
214+
push!(cmd, tarx(dir))
215+
else
216+
file = rsplit(url, '/', limit=2)
217+
push!(cmd, joinpath(dir, file[end]))
218+
end
219+
elseif dir isa ChannelPipe
220+
push!(cmd, dir)
238221
end
239222
pipeline(cmd...)
240223
end
241224

242225
function downloadcommand(url::AbstractString, filename::AbstractString="-")
243-
`sh -c 'curl "'$url'" -Lso "'$filename'"'`
226+
curl(url) (filename == "-" ? stdout : filename)
244227
end
245228

246229
function data_warn(data::RemoteMatrixData, dn, i1, i2)
@@ -338,7 +321,6 @@ issvdok(::MatrixData) = false
338321
Copy file from remote or local url. Works around julia Downloads #69 and #36
339322
"""
340323
function downloadfile(url::AbstractString, out::AbstractString)
341-
run(downloadcommand(url, out))
324+
wait(run(downloadcommand(url, out)))
342325
nothing
343326
end
344-

src/downloadsp.jl

+1-1
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
#
44

55
using MAT
6-
using DataFrames
6+
using DataFrames: DataFrames, DataFrame, select!
77
using Base.Filesystem
88

99
#const SS_SITE = "https://sparse.tamu.edu"

src/matrixmarket.jl

+22-8
Original file line numberDiff line numberDiff line change
@@ -32,9 +32,9 @@ const HERMITIAN = "hermitian"
3232
const SKEW_SYMMETRIC = "skew-symmetric"
3333

3434
function mmread(file::IO)
35-
line = lowercase(readline(file))
35+
line = lowercase(readskip(file))
3636
tokens = split(line)
37-
if tokens[1] != MATRIXM
37+
if length(tokens) < 2 || tokens[1] != MATRIXM
3838
parserr(string("Matrixmarket: invalid header:", line))
3939
end
4040
line = readline(file)
@@ -241,7 +241,7 @@ function mmreadcomment(filename::AbstractString)
241241
open(filename,"r") do mmfile
242242
skip = 0
243243
while !eof(mmfile)
244-
s = readline(mmfile)
244+
s = readskip(mmfile)
245245
skip = isempty(strip(s)) || s[1] == '%' ? 0 : skip + 1
246246
skip <= 1 && println(io, s)
247247
if skip == 1
@@ -254,6 +254,21 @@ function mmreadcomment(filename::AbstractString)
254254
String(take!(io))
255255
end
256256

257+
"""
258+
readskip(io)
259+
260+
Read a line. If line starts with a ustar-header block remove this block.
261+
This allows to read through a tar archive without extracting the files.
262+
"""
263+
function readskip(io::IO)
264+
max = 512
265+
line = readline(io)
266+
if length(line) >= max && line[258:263] == "ustar\0"
267+
line = line[max+1:end]
268+
end
269+
line
270+
end
271+
257272
"""
258273
mmreadheader(filename)
259274
Read header information from mtx file.
@@ -264,7 +279,7 @@ function mmreadheader(file::AbstractString)
264279
if stat(io).size == 0
265280
return nothing
266281
end
267-
line = lowercase(readline(io))
282+
line = lowercase(readskip(io))
268283
while true
269284
token = split(line)
270285
if length(token) >= 4 &&
@@ -275,8 +290,8 @@ function mmreadheader(file::AbstractString)
275290
hdr = Dict{Symbol,Any}()
276291
field = :none
277292
while startswith(line, '%') || isempty(strip(line))
278-
field = push_hdr!(hdr, line, field)
279-
line = readline(io)
293+
field = push_hdr!(hdr, line, field)
294+
line = readskip(io)
280295
end
281296
res = try parseint(line) catch; [] end
282297
if length(res) != (token[3] == COORD ? 3 : 2)
@@ -299,7 +314,7 @@ function mmreadheader(file::AbstractString)
299314
return hdr
300315
else
301316
while !eof(io) && !startswith(line, '%')
302-
line = readline(io)
317+
line = readskip(io)
303318
end
304319
if eof(io)
305320
return hdr
@@ -456,4 +471,3 @@ function parsenext(T::Type{<:Complex}, c, p)
456471
p, s = parsenext(R, c, p)
457472
p, r + s*im
458473
end
459-

src/types.jl

+1-1
Original file line numberDiff line numberDiff line change
@@ -211,7 +211,7 @@ function Base.show(io::IO, data::RemoteMatrixData)
211211
nnz = hd.nnz == hd.dnz ? "$(hd.nnz)" : "$(hd.nnz)/$(hd.dnz)"
212212
print(io, " $(hd.m)x$(hd.n)($nnz) ")
213213
print(io, data.date != 0 ? data.date : "")
214-
meta = join(metastring.(data.name, metadata(data)), ", ")
214+
meta = join(metastring.(data.name, getmeta(data)), ", ")
215215
n = length(meta)
216216
if n > 40
217217
meta = string(meta[1:17], " ... ", meta[end-17:end])

0 commit comments

Comments
 (0)