@@ -3,18 +3,33 @@ package ai.chronon.api.planner
3
3
import ai .chronon .api .Extensions .{GroupByOps , MetadataOps , SourceOps , StringOps }
4
4
import ai .chronon .api .ScalaJavaConversions .{IterableOps , IteratorOps }
5
5
import ai .chronon .api ._
6
- import ai .chronon .api .planner .JoinOfflinePlanner ._
7
- import ai .chronon .api .planner .GroupByOfflinePlanner ._
8
6
import ai .chronon .planner ._
9
7
10
- import scala .collection .mutable
11
- import scala .language .{implicitConversions , reflectiveCalls }
12
8
import scala .collection .Seq
9
+ import scala .language .{implicitConversions , reflectiveCalls }
13
10
14
- class JoinOfflinePlanner (join : Join )(implicit outputPartitionSpec : PartitionSpec )
11
+ class JoinPlanner (join : Join )(implicit outputPartitionSpec : PartitionSpec )
15
12
extends Planner [Join ](join)(outputPartitionSpec) {
16
13
17
- val leftSourceNode : SourceWithFilterNode = {
14
+ // will mutate the join in place - use on deepCopy-ied objects only
15
+ private def unsetNestedMetadata (join : Join ): Unit = {
16
+ join.unsetMetaData()
17
+ Option (join.joinParts).foreach(_.iterator().toScala.foreach(_.groupBy.unsetMetaData()))
18
+ Option (join.labelParts).foreach(_.labels.iterator().toScala.foreach(_.groupBy.unsetMetaData()))
19
+ join.unsetOnlineExternalParts()
20
+ }
21
+
22
+ private def joinWithoutExecutionInfo : Join = {
23
+ val copied = join.deepCopy()
24
+ copied.metaData.unsetExecutionInfo()
25
+ Option (copied.joinParts).foreach(_.iterator().toScala.foreach(_.groupBy.metaData.unsetExecutionInfo()))
26
+ Option (copied.labelParts).foreach(_.labels.iterator().toScala.foreach(_.groupBy.metaData.unsetExecutionInfo()))
27
+ copied.unsetOnlineExternalParts()
28
+ copied
29
+ }
30
+
31
+ val leftSourceNode : Node = {
32
+
18
33
val left = join.left
19
34
val result = new SourceWithFilterNode ()
20
35
.setSource(left)
@@ -34,12 +49,12 @@ class JoinOfflinePlanner(join: Join)(implicit outputPartitionSpec: PartitionSpec
34
49
stepDays = Some (1 )
35
50
)
36
51
37
- result.setMetaData (metaData)
52
+ toNode (metaData, _.setSourceWithFilter(result), result )
38
53
}
39
54
40
- val bootstrapNodeOpt : Option [JoinBootstrapNode ] = Option (join.bootstrapParts).map { bootstrapParts =>
55
+ private val bootstrapNodeOpt : Option [Node ] = Option (join.bootstrapParts).map { bootstrapParts =>
41
56
val result = new JoinBootstrapNode ()
42
- .setJoin(join )
57
+ .setJoin(joinWithoutExecutionInfo )
43
58
44
59
// bootstrap tables are unfortunately unique to the join - can't be re-used if a new join part is added
45
60
val bootstrapNodeName = join.metaData.name + " /boostrap"
@@ -56,12 +71,25 @@ class JoinOfflinePlanner(join: Join)(implicit outputPartitionSpec: PartitionSpec
56
71
stepDays = Some (1 )
57
72
)
58
73
59
- result.setMetaData(metaData)
74
+ val content = new NodeContent ()
75
+ content.setJoinBootstrap(result)
76
+
77
+ val copy = result.deepCopy()
78
+ unsetNestedMetadata(copy.join)
79
+
80
+ toNode(metaData, _.setJoinBootstrap(result), copy)
81
+ }
82
+
83
+ private def copyAndEraseExecutionInfo (joinPart : JoinPart ): JoinPart = {
84
+ val copy = joinPart.deepCopy()
85
+ copy.groupBy.metaData.unsetExecutionInfo()
86
+ copy
60
87
}
61
88
62
- private def buildJoinPartNode (joinPart : JoinPart ): JoinPartNode = {
89
+ private def buildJoinPartNode (joinPart : JoinPart ): Node = {
90
+
63
91
val result = new JoinPartNode ()
64
- .setJoinPart(joinPart)
92
+ .setJoinPart(copyAndEraseExecutionInfo( joinPart) )
65
93
.setLeftDataModel(join.left.dataModel)
66
94
.setLeftSourceTable(leftSourceNode.metaData.outputTable)
67
95
@@ -76,7 +104,7 @@ class JoinOfflinePlanner(join: Join)(implicit outputPartitionSpec: PartitionSpec
76
104
.map(_.stepDays)
77
105
.getOrElse(joinPart.groupBy.dataModel match {
78
106
case DataModel .ENTITIES => 1
79
- case DataModel .EVENTS => 15
107
+ case DataModel .EVENTS => 15
80
108
})
81
109
82
110
// pull conf params from the groupBy metadata, but use the join namespace to write to.
@@ -90,12 +118,15 @@ class JoinOfflinePlanner(join: Join)(implicit outputPartitionSpec: PartitionSpec
90
118
)
91
119
.setOutputNamespace(join.metaData.outputNamespace)
92
120
93
- result.setMetaData(metaData)
121
+ val copy = result.deepCopy()
122
+ copy.joinPart.groupBy.unsetMetaData()
123
+
124
+ toNode(metaData, _.setJoinPart(result), copy)
94
125
}
95
126
96
- private val joinPartNodes : Seq [JoinPartNode ] = join.joinParts.toScala.map { buildJoinPartNode }.toSeq
127
+ private val joinPartNodes : Seq [Node ] = join.joinParts.toScala.map { buildJoinPartNode }.toSeq
97
128
98
- val mergeNode : JoinMergeNode = {
129
+ val mergeNode : Node = {
99
130
val result = new JoinMergeNode ()
100
131
.setJoin(join)
101
132
@@ -124,10 +155,16 @@ class JoinOfflinePlanner(join: Join)(implicit outputPartitionSpec: PartitionSpec
124
155
stepDays = Some (1 )
125
156
)
126
157
127
- result.setMetaData(metaData)
158
+ val copy = result.deepCopy()
159
+ unsetNestedMetadata(copy.join)
160
+ copy.join.unsetDerivations()
161
+ copy.join.unsetLabelParts()
162
+
163
+ toNode(metaData, _.setJoinMerge(result), copy)
128
164
}
129
165
130
- private val derivationNodeOpt : Option [JoinDerivationNode ] = Option (join.derivations).map { _ =>
166
+ private val derivationNodeOpt : Option [Node ] = Option (join.derivations).map { _ =>
167
+
131
168
val result = new JoinDerivationNode ()
132
169
.setJoin(join)
133
170
@@ -142,7 +179,11 @@ class JoinOfflinePlanner(join: Join)(implicit outputPartitionSpec: PartitionSpec
142
179
stepDays = Some (1 )
143
180
)
144
181
145
- result.setMetaData(metaData)
182
+ val copy = result.deepCopy()
183
+ unsetNestedMetadata(copy.join)
184
+ copy.join.unsetLabelParts()
185
+
186
+ toNode(metaData, _.setJoinDerivation(result), copy)
146
187
}
147
188
148
189
// these need us to additionally (groupBy backfill) generate the snapshot tables
@@ -156,7 +197,7 @@ class JoinOfflinePlanner(join: Join)(implicit outputPartitionSpec: PartitionSpec
156
197
)
157
198
.getOrElse(Array .empty)
158
199
159
- private val labelJoinNodeOpt : Option [LabelJoinNode ] = Option (join.labelParts).map { labelParts =>
200
+ private val labelJoinNodeOpt : Option [Node ] = Option (join.labelParts).map { labelParts =>
160
201
val result = new LabelJoinNode ()
161
202
.setJoin(join)
162
203
@@ -177,112 +218,46 @@ class JoinOfflinePlanner(join: Join)(implicit outputPartitionSpec: PartitionSpec
177
218
stepDays = Some (1 )
178
219
)
179
220
180
- result.setMetaData(metaData )
181
- }
221
+ val copy = result.deepCopy( )
222
+ unsetNestedMetadata(result.join)
182
223
183
- override def offlineNodes : Seq [ PlanNode ] = {
184
- val result : mutable. ArrayBuffer [ PlanNode ] = mutable. ArrayBuffer .empty[ PlanNode ]
224
+ toNode(metaData, _.setLabelJoin(result), copy)
225
+ }
185
226
186
- result.append(leftSourceNode)
187
- bootstrapNodeOpt.foreach(bn => result.append(bn))
188
- joinPartNodes.foreach(jpn => result.append(jpn))
227
+ def offlineNodes : Seq [Node ] = {
228
+
229
+ Seq (leftSourceNode) ++
230
+ bootstrapNodeOpt ++
231
+ joinPartNodes ++
232
+ Seq (mergeNode) ++
233
+ derivationNodeOpt ++
234
+ snapshotLabelParts.map(_.groupBy).map{
235
+ gb => new GroupByPlanner (gb).backfillNode
236
+ } ++
237
+ labelJoinNodeOpt
238
+ }
189
239
190
- result.append(mergeNode)
191
- derivationNodeOpt.foreach(dn => result.append(dn))
192
- snapshotLabelParts.foreach(lp => result.append(lp.groupBy))
193
- labelJoinNodeOpt.foreach(ljn => result.append(ljn))
194
240
195
- result
241
+ override def onlineNodes : Seq [Node ] = {
242
+ // depends on all
196
243
}
197
244
198
- override def onlineNodes : Seq [PlanNode ] = ???
245
+
246
+ override def buildPlan : ConfPlan = {
247
+
248
+ val result = new ConfPlan ()
249
+ result.setNodes()
250
+ }
199
251
}
200
252
201
- object JoinOfflinePlanner {
253
+ object JoinPlanner {
202
254
255
+ // will mutate the join in place - use on deepCopy-ied objects only
203
256
private def unsetNestedMetadata (join : Join ): Unit = {
204
257
join.unsetMetaData()
205
258
Option (join.joinParts).foreach(_.iterator().toScala.foreach(_.groupBy.unsetMetaData()))
206
259
Option (join.labelParts).foreach(_.labels.iterator().toScala.foreach(_.groupBy.unsetMetaData()))
207
260
join.unsetOnlineExternalParts()
208
261
}
209
262
210
- implicit class LabelJoinNodeIsPlanNode (node : LabelJoinNode ) extends PlanNode {
211
- override def metaData : MetaData = node.metaData
212
- override def contents : Any = node
213
- override def semanticHash : String = ThriftJsonCodec .hexDigest({
214
- val result = node.deepCopy()
215
- result.unsetMetaData()
216
- unsetNestedMetadata(result.join)
217
- result
218
- })
219
- }
220
-
221
- implicit class JoinDerivationNodeIsPlanNode (node : JoinDerivationNode ) extends PlanNode {
222
- override def metaData : MetaData = node.metaData
223
- override def contents : Any = node
224
- override def semanticHash : String = ThriftJsonCodec .hexDigest({
225
- val result = node.deepCopy()
226
- result.unsetMetaData()
227
- unsetNestedMetadata(result.join)
228
- result.join.unsetLabelParts()
229
- result
230
- })
231
- }
232
-
233
- implicit class JoinMergeNodeIsPlanNode (node : JoinMergeNode ) extends PlanNode {
234
- override def metaData : MetaData = node.metaData
235
- override def contents : Any = node
236
- override def semanticHash : String = ThriftJsonCodec .hexDigest({
237
- val result = node.deepCopy()
238
- result.unsetMetaData()
239
- unsetNestedMetadata(result.join)
240
- result.join.unsetDerivations()
241
- result.join.unsetLabelParts()
242
- result
243
- })
244
- }
245
-
246
- implicit class JoinPartNodeIsPlanNode (node : JoinPartNode ) extends PlanNode {
247
- override def metaData : MetaData = node.metaData
248
- override def contents : Any = node
249
- override def semanticHash : String = ThriftJsonCodec .hexDigest({
250
- val result = node.deepCopy()
251
- result.unsetMetaData()
252
- result.joinPart.groupBy.unsetMetaData()
253
- result
254
- })
255
- }
256
-
257
- implicit class JoinBootstrapNodeIsPlanNode (node : JoinBootstrapNode ) extends PlanNode {
258
- override def metaData : MetaData = node.metaData
259
- override def contents : Any = node
260
- override def semanticHash : String = ThriftJsonCodec .hexDigest({
261
- val result = node.deepCopy()
262
- result.unsetMetaData()
263
- unsetNestedMetadata(result.join)
264
- result
265
- })
266
- }
267
-
268
- implicit class SourceWithFilterNodeIsPlanNode (node : SourceWithFilterNode ) extends PlanNode {
269
- override def metaData : MetaData = node.metaData
270
- override def contents : Any = node
271
- override def semanticHash : String = ThriftJsonCodec .hexDigest({
272
- val result = node.deepCopy()
273
- result.unsetMetaData()
274
- result
275
- })
276
- }
277
-
278
- implicit class JoinIsPlanNode (node : Join ) extends PlanNode {
279
- override def metaData : MetaData = node.metaData
280
- override def contents : Any = node
281
- override def semanticHash : String = ThriftJsonCodec .hexDigest({
282
- val result = node.deepCopy()
283
- unsetNestedMetadata(node)
284
- result
285
- })
286
- }
287
-
288
263
}
0 commit comments