-
Notifications
You must be signed in to change notification settings - Fork 63
Adds support for FULL/RIGHT OUTER JOIN and improves performance of JOINs #1295
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 3 commits
9d03b03
c9a6316
4fc9dc3
c033453
ed21f96
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
This file was deleted.
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,79 @@ | ||
package org.partiql.eval.internal.operator.rel | ||
|
||
import org.partiql.eval.internal.Record | ||
import org.partiql.eval.internal.operator.Operator | ||
import org.partiql.value.BoolValue | ||
import org.partiql.value.PartiQLValue | ||
import org.partiql.value.PartiQLValueExperimental | ||
import org.partiql.value.StructValue | ||
import org.partiql.value.nullValue | ||
import org.partiql.value.structValue | ||
|
||
internal abstract class RelJoinNestedLoop : Operator.Relation { | ||
|
||
abstract val lhs: Operator.Relation | ||
abstract val rhs: Operator.Relation | ||
abstract val condition: Operator.Expr | ||
|
||
private var rhsRecord: Record? = null | ||
|
||
override fun open() { | ||
lhs.open() | ||
rhs.open() | ||
rhsRecord = rhs.next() | ||
} | ||
|
||
@OptIn(PartiQLValueExperimental::class) | ||
abstract fun getOutputRecord(result: Boolean, lhs: Record, rhs: Record): Record? | ||
|
||
@OptIn(PartiQLValueExperimental::class) | ||
override fun next(): Record? { | ||
var lhsRecord = lhs.next() | ||
var toReturn: Record? = null | ||
do { | ||
// Acquire LHS and RHS Records | ||
if (lhsRecord == null) { | ||
lhs.close() | ||
rhsRecord = rhs.next() ?: return null | ||
lhs.open() | ||
lhsRecord = lhs.next() | ||
} | ||
// Return Joined Record | ||
if (lhsRecord != null && rhsRecord != null) { | ||
val input = lhsRecord + rhsRecord!! | ||
val result = condition.eval(input) | ||
toReturn = getOutputRecord(result.isTrue(), lhsRecord, rhsRecord!!) | ||
} | ||
} | ||
while (toReturn == null) | ||
return toReturn | ||
} | ||
|
||
override fun close() { | ||
lhs.close() | ||
rhs.close() | ||
} | ||
|
||
@OptIn(PartiQLValueExperimental::class) | ||
private fun PartiQLValue.isTrue(): Boolean { | ||
return this is BoolValue && this.value == true | ||
} | ||
|
||
@OptIn(PartiQLValueExperimental::class) | ||
internal fun Record.padNull() { | ||
this.values.indices.forEach { index -> | ||
this.values[index] = values[index].padNull() | ||
} | ||
} | ||
|
||
@OptIn(PartiQLValueExperimental::class) | ||
private fun PartiQLValue.padNull(): PartiQLValue { | ||
return when (this) { | ||
is StructValue<*> -> { | ||
val newFields = this.fields?.map { it.first to nullValue() } | ||
structValue(newFields) | ||
} | ||
else -> nullValue() | ||
} | ||
} | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,61 @@ | ||
package org.partiql.eval.internal.operator.rel | ||
|
||
import org.partiql.eval.internal.Record | ||
import org.partiql.eval.internal.operator.Operator | ||
|
||
/** | ||
* Here's a simple implementation of FULL OUTER JOIN. The idea is fairly straightforward: | ||
* Iterate through LHS. For each iteration of the LHS, iterate through RHS. Now, check the condition. | ||
* - If the condition passes, return the merged record (equivalent to result of INNER JOIN) | ||
* - If the condition does not pass, we need a way to return two records (one where the LHS is padded with nulls, and | ||
* one where the RHS is padded with nulls). How we do this: | ||
* - We maintain the [previousLhs] and [previousRhs]. If they are null, we then compute the next LHS and RHS. We | ||
* store their values in-memory. Then we return a merged Record where the LHS is padded and the RHS is not (equivalent | ||
* to result of RIGHT OUTER JOIN). | ||
* - If they aren't null, then we pad the RHS with NULLS (we assume we've already padded the LHS) and return (equivalent | ||
* to result of LEFT OUTER JOIN). We also make sure [previousLhs] and [previousRhs] are now null. | ||
* | ||
* Performance Analysis: Assume that [lhs] has size M and [rhs] has size N. | ||
* - Time: O(M * N) | ||
* - Space: O(1) | ||
*/ | ||
internal class RelJoinOuterFull( | ||
override val lhs: Operator.Relation, | ||
override val rhs: Operator.Relation, | ||
override val condition: Operator.Expr | ||
) : RelJoinNestedLoop() { | ||
|
||
private var previousLhs: Record? = null | ||
private var previousRhs: Record? = null | ||
|
||
override fun next(): Record? { | ||
if (previousLhs != null && previousRhs != null) { | ||
previousRhs!!.padNull() | ||
val newRecord = previousLhs!! + previousRhs!! | ||
previousLhs = null | ||
previousRhs = null | ||
return newRecord | ||
} | ||
Comment on lines
+32
to
+38
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. What about checking the join condition? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Great catch! Updated and I added a test to use TRUE as the condition. |
||
return super.next() | ||
} | ||
|
||
/** | ||
* Specifically, for FULL OUTER JOIN, when the JOIN Condition ([result]) is TRUE, we need to return the | ||
* rows merged (without modification). When the JOIN Condition ([result]) is FALSE, we need to return | ||
* the LHS padded (and merged with RHS not padded) and the RHS padded (merged with the LHS not padded). | ||
*/ | ||
override fun getOutputRecord(result: Boolean, lhs: Record, rhs: Record): Record { | ||
when (result) { | ||
true -> { | ||
previousLhs = null | ||
previousRhs = null | ||
} | ||
false -> { | ||
previousLhs = lhs.copy() | ||
previousRhs = rhs.copy() | ||
lhs.padNull() | ||
} | ||
} | ||
return lhs + rhs | ||
} | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,21 @@ | ||
package org.partiql.eval.internal.operator.rel | ||
|
||
import org.partiql.eval.internal.Record | ||
import org.partiql.eval.internal.operator.Operator | ||
|
||
internal class RelJoinRight( | ||
lhs: Operator.Relation, | ||
rhs: Operator.Relation, | ||
override val condition: Operator.Expr | ||
) : RelJoinNestedLoop() { | ||
|
||
override val lhs: Operator.Relation = rhs | ||
override val rhs: Operator.Relation = lhs | ||
|
||
override fun getOutputRecord(result: Boolean, lhs: Record, rhs: Record): Record { | ||
if (result.not()) { | ||
lhs.padNull() | ||
} | ||
return lhs + rhs | ||
} | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
join
https://www.youtube.com/watch?v=PEgk2v6KntY