|
4 | 4 | [clojure.core.reducers :as r]
|
5 | 5 | [clojure.core.protocols :as ccp]
|
6 | 6 | [clojure.string :as str]
|
| 7 | + [clojure.reflect :as reflect] |
| 8 | + [parkour.writable :as w] |
7 | 9 | [parkour.util :refer [returning]]
|
8 | 10 | [parkour.reducers :as pr])
|
9 | 11 | (:import [java.util Comparator]
|
10 | 12 | [clojure.lang IPersistentCollection]
|
| 13 | + [org.apache.hadoop.conf Configuration] |
11 | 14 | [org.apache.hadoop.io NullWritable]
|
12 | 15 | [org.apache.hadoop.mapreduce
|
13 |
| - MapContext ReduceContext TaskInputOutputContext])) |
| 16 | + Job MapContext ReduceContext TaskInputOutputContext])) |
14 | 17 |
|
15 | 18 | (defprotocol MRSource
|
16 | 19 | (keyvals [source] "")
|
|
130 | 133 | (returning sink (.write sink (NullWritable/get) val)))
|
131 | 134 |
|
132 | 135 | IPersistentCollection
|
133 |
| - (emit-keyval [sink keyval] (conj sink keyval)) |
134 |
| - (emit-key [sink key] (conj sink [key nil])) |
135 |
| - (emit-val [sink val] (conj sink [nil val]))) |
| 136 | + (emit-keyval [sink keyval] (conj sink (mapv w/clone keyval))) |
| 137 | + (emit-key [sink key] (conj sink [(w/clone key) nil])) |
| 138 | + (emit-val [sink val] (conj sink [nil (w/clone val)]))) |
| 139 | + |
| 140 | +(def ^:private job-factory-method? |
| 141 | + (->> Job reflect/type-reflect :members (some #(= 'getInstance (:name %))))) |
| 142 | + |
| 143 | +(defmacro ^:private make-job |
| 144 | + [& args] `(~(if job-factory-method? `Job/getInstance `Job.) ~@args)) |
| 145 | + |
| 146 | +(defn job |
| 147 | + {:tag `Job} |
| 148 | + ([] (make-job)) |
| 149 | + ([conf] |
| 150 | + (if (instance? Job conf) |
| 151 | + (make-job (-> ^Job conf .getConfiguration Configuration.)) |
| 152 | + (make-job ^Configuration conf)))) |
| 153 | + |
| 154 | +(defn mapper! |
| 155 | + [^Job job var & args] |
| 156 | + (let [conf (.getConfiguration job) |
| 157 | + i (.getInt conf "parkour.mapper.next" 0)] |
| 158 | + (doto conf |
| 159 | + (.setInt "parkour.mapper.next" (inc i)) |
| 160 | + (.set (format "parkour.mapper.%d.var" i) (pr-str var)) |
| 161 | + (.set (format "parkour.mapper.%d.args" i) (pr-str args))) |
| 162 | + (Class/forName (format "parkour.hadoop.Mappers$_%d" i)))) |
| 163 | + |
| 164 | +(defn reducer! |
| 165 | + [^Job job var & args] |
| 166 | + (let [conf (.getConfiguration job) |
| 167 | + i (.getInt conf "parkour.reducer.next" 0)] |
| 168 | + (doto conf |
| 169 | + (.setInt "parkour.reducer.next" (inc i)) |
| 170 | + (.set (format "parkour.reducer.%d.var" i) (pr-str var)) |
| 171 | + (.set (format "parkour.reducer.%d.args" i) (pr-str args))) |
| 172 | + (Class/forName (format "parkour.hadoop.Reducers$_%d" i)))) |
| 173 | + |
| 174 | +(defn partitioner! |
| 175 | + [^Job job var & args] |
| 176 | + (let [conf (.getConfiguration job)] |
| 177 | + (doto conf |
| 178 | + (.set "parkour.partitioner.var" (pr-str var)) |
| 179 | + (.set "parkour.partitioner.args" (pr-str args))) |
| 180 | + parkour.hadoop.Partitioner)) |
| 181 | + |
| 182 | +(defn set-mapper-var |
| 183 | + [^Job job var & args] |
| 184 | + (let [[key val] (-> var meta ::output)] |
| 185 | + (.setMapperClass job (apply mapper! job var args)) |
| 186 | + (when key (.setMapOutputKeyClass job key)) |
| 187 | + (when val (.setMapOutputValueClass job val)))) |
| 188 | + |
| 189 | +(defn set-combiner-var |
| 190 | + [^Job job var & args] |
| 191 | + (.setCombinerClass job (apply reducer! job var args))) |
| 192 | + |
| 193 | +(defn set-reducer-var |
| 194 | + [^Job job var & args] |
| 195 | + (let [[key val] (-> var meta ::output)] |
| 196 | + (.setReducerClass job (apply reducer! job var args)) |
| 197 | + (when key (.setOutputKeyClass job key)) |
| 198 | + (when val (.setOutputValueClass job val)))) |
0 commit comments