6
6
[parkour.util :as util :refer [doto-let]])
7
7
(:import [java.io Writer]
8
8
[java.net URI]
9
- [clojure.lang IDeref IPending]
9
+ [clojure.lang IDeref IObj IPending]
10
+ [org.apache.hadoop.fs Path]
10
11
[org.apache.hadoop.filecache DistributedCache]))
11
12
12
- ; ; Distributed value
13
- (deftype DVal [value readv params dcm]
14
- Object
15
- (toString [_]
16
- (let [v (if (realized? value) @value :pending )]
17
- (str " #<DVal: " (pr-str v) " >" )))
18
- IDeref (deref [_] @value)
19
- IPending (isRealized [_] (realized? value)))
13
+ (defn ^:private cache-name
14
+ " Generate distinct distcache-entry name for `source`."
15
+ [source] (str (gensym " dval-" ) " -" (-> source fs/path .getName)))
16
+
17
+ (defn ^:private dcpath*
18
+ [dcname path]
19
+ (let [path (str (fs/path path))
20
+ dcpath* (fn dcpath* [md]
21
+ (proxy [Path IObj] [path]
22
+ (meta [] md)
23
+ (withMeta [md] (dcpath* md path))))]
24
+ (dcpath* {:type ::dcpath , ::dcname dcname})))
25
+
26
+ (defn dcpath
27
+ " Distributed-cache–able instance of `path`. When serialized into a job
28
+ configuration, adds `path` to the distributed cache. When deserialized from a
29
+ job configuration, reconstitutes as the cache path when available and as the
30
+ original remote path when not (i.e. under local- or mixed-mode job execution)."
31
+ [path]
32
+ (let [path (fs/path path)
33
+ dcname (cache-name path)]
34
+ (dcpath* dcname path)))
35
+
36
+ (defmethod print-method ::dcpath
37
+ [path ^Writer writer]
38
+ (if (nil? cser/*conf*)
39
+ (print-method (with-meta path nil ) writer)
40
+ (let [^String dcname (-> path meta ::dcname )]
41
+ (fs/distcache! cser/*conf* {dcname path})
42
+ (.write writer " #parkour/dcpath \" " )
43
+ (.write writer dcname)
44
+ (.write writer " \" " ))))
20
45
21
46
(defn ^:private unfragment
22
47
" The provided `uri`, but without any fragment."
28
53
(let [uri-s (str uri), n (- (count uri-s) (count fragment) 1 )]
29
54
(fs/uri (subs uri-s 0 n))))))
30
55
31
- (defn ^:private ->entry
32
- " Parse `remote` and `local` into mapping tuple of (fragment, cache path)."
33
- [^URI remote local]
56
+ (defn ^:private resolve-source
57
+ " Resolve distributed cache `remote` and `local` URIs to the \" most local\"
58
+ available source path. Result will usually be a local file path, but may be the
59
+ original remote path under mixed-mode job execution."
60
+ [[^URI remote local]]
34
61
(if-let [fragment (.getFragment remote)]
35
62
(let [symlink (io/file fragment), symlink? (.exists symlink)
36
63
local (io/file (str local)), local? (.exists local)
37
64
remote (unfragment remote), remote? (= " file" (.getScheme remote))
38
65
source (cond symlink? symlink, local? local, remote? remote
66
+ ; ; Could localize, but issues: clean-up, directories
39
67
(mr/local-runner? cser/*conf*) remote
40
68
:else (throw (ex-info
41
69
(str remote " : cannot locate local file" )
42
70
{:remote remote, :local local})))]
43
- [fragment (fs/path source)] )))
71
+ (fs/path source))))
44
72
45
- (defn ^:private distcache-dval
46
- " Remote-side dval data reader, reconstituting as a delay ."
47
- [[readv params cnames] ]
73
+ (defn ^:private dcpath-reader
74
+ " EDN tagged-literal reader for dcpaths ."
75
+ [dcname ]
48
76
(let [remotes (seq (DistributedCache/getCacheFiles cser/*conf*))
49
- locals (seq (DistributedCache/getLocalCacheFiles cser/*conf*))
50
- _ (when (not= (count remotes) (count locals))
51
- (throw (ex-info " cache files do not match local files"
52
- {:remotes remotes, :locals locals})))
53
- entries (map ->entry remotes locals)
54
- cname->source (->> entries (remove nil?) (into {}))
55
- sources (map cname->source cnames)
56
- args (concat params sources)]
57
- (delay (apply readv args))))
77
+ locals (or (seq (DistributedCache/getLocalCacheFiles cser/*conf*))
78
+ (map unfragment remotes))]
79
+ (if (not= (count remotes) (count locals))
80
+ (throw (ex-info " cache files do not match local files"
81
+ {:remotes remotes, :locals locals}))
82
+ (->> (map vector remotes locals)
83
+ (pr/ffilter (fn [[^URI r]] (= dcname (.getFragment r))))
84
+ (resolve-source )
85
+ (dcpath* dcname)))))
86
+
87
+ ; ; Distributed value
88
+ (deftype DVal [value form]
89
+ Object
90
+ (toString [_]
91
+ (let [v (if (realized? value) @value :pending )]
92
+ (str " #<DVal: " (pr-str v) " >" )))
93
+ IDeref (deref [_] @value)
94
+ IPending (isRealized [_] (realized? value)))
58
95
59
96
(defmethod print-method DVal
60
- [^DVal dval ^Writer writer]
61
- (let [repr (if (nil? cser/*conf*)
62
- (str dval)
63
- (let [v (.-readv dval), p (.-params dval), dcm (.-dcm dval)
64
- literal (pr-str [v p (-> dcm keys vec)])]
65
- (fs/distcache! cser/*conf* dcm)
66
- (str " #parkour/dval " literal)))]
67
- (.write writer repr)))
97
+ [^DVal dval ^Writer w]
98
+ (if (nil? cser/*conf*)
99
+ (.write w (str dval))
100
+ (do
101
+ (.write w " #parkour/dval " )
102
+ (.write w (pr-str (.-form dval))))))
103
+
104
+ (defn ^:private dval-reader
105
+ " EDN tagged-literal reader for dvals."
106
+ [[f & args]] (delay (apply f args)))
107
+
108
+ (defn ^:private dval*
109
+ " Return a dval which locally proxies to `valref` and remotely will deserialize
110
+ as a delay over applying var `readv` to `args`."
111
+ [valref readv & args] (DVal. valref (cons readv args)))
112
+
113
+ (defn dval
114
+ " Return a dval which acts as a delay over applying var `readv` to `args`."
115
+ [readv & args] (apply dval* (delay (apply readv args)) readv args))
68
116
69
117
(defn ^:private identity-ref
70
118
" Return reference which yields `x` when `deref`ed."
71
- [x]
72
- (reify
73
- IDeref (deref [_] x)
74
- IPending (isRealized [_] true )))
75
-
76
- (defn ^:private cache-name
77
- " Generate distinct distcache-entry name for `source`."
78
- [source] (str (gensym " dval-" ) " -" (-> source fs/path .getName)))
119
+ [x] (reify IDeref (deref [_] x), IPending (isRealized [_] true )))
79
120
80
- (defn ^:private dval
81
- " Return a dval which locally holds `value` and remotely will deserialize by
82
- applying var `readv` to the concatenation of `params` and distributed copies of
83
- `sources`."
84
- ([value readv sources] (dval value readv nil sources))
85
- ([value readv params sources]
86
- (let [dcm (into {} (map (juxt cache-name fs/uri) sources))
87
- value (if (instance? IDeref value) value (identity-ref value))]
88
- (DVal. value readv params dcm))))
121
+ (defn value-dval
122
+ " Return a dval which locally holds `value` and remotely will deserialize as a
123
+ delay over applying var `readv` to `args`."
124
+ [value readv & args] (apply dval* (identity-ref value) readv args))
89
125
90
126
(defn load-dval
91
- " Return a dval which will deserialize by applying var `readv` to the
127
+ " Return a delay-like dval which will realize by applying var `readv` to the
92
128
concatenation of `params` and `sources` locally and distributed copies of
93
129
`sources` remotely."
94
130
([readv sources] (load-dval readv nil sources))
95
131
([readv params sources]
96
- (let [args (concat params sources)
97
- value (delay (apply readv args))]
98
- (dval value readv params sources))))
132
+ (apply dval readv (concat params (map dcpath sources)))))
99
133
100
134
(defn copy-dval
101
135
" Like `load-dval`, but first copy `sources` to transient locations."
@@ -114,7 +148,7 @@ deserialize by calling var `readv` with a distributed copy of the transient
114
148
serialization path."
115
149
[writef readv value]
116
150
(let [source (doto (transient-path ) (writef value))]
117
- (dval value readv [ source] )))
151
+ (value- dval value readv ( dcpath source) )))
118
152
119
153
(defn edn-dval
120
154
" EDN-serialize `value` to a transient location and yield a wrapping dval."
0 commit comments