Skip to content

Add compare_ip operator udfs #3821

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 7 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -115,7 +115,10 @@ public static SqlTypeName convertRelDataTypeToSqlTypeName(RelDataType type) {
case EXPR_DATE -> SqlTypeName.DATE;
case EXPR_TIME -> SqlTypeName.TIME;
case EXPR_TIMESTAMP -> SqlTypeName.TIMESTAMP;
case EXPR_IP -> SqlTypeName.VARCHAR;
// EXPR_IP is mapped to SqlTypeName.NULL since there is no
// corresponding SqlTypeName in Calcite. This is a workaround to allow
// type checking for IP types in UDFs.
case EXPR_IP -> SqlTypeName.NULL;
case EXPR_BINARY -> SqlTypeName.VARBINARY;
default -> type.getSqlTypeName();
};
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@
import org.opensearch.sql.expression.function.udf.datetime.WeekFunction;
import org.opensearch.sql.expression.function.udf.datetime.WeekdayFunction;
import org.opensearch.sql.expression.function.udf.datetime.YearweekFunction;
import org.opensearch.sql.expression.function.udf.ip.CidrMatchFunction;
import org.opensearch.sql.expression.function.udf.ip.*;
import org.opensearch.sql.expression.function.udf.math.CRC32Function;
import org.opensearch.sql.expression.function.udf.math.ConvFunction;
import org.opensearch.sql.expression.function.udf.math.DivideFunction;
Expand Down Expand Up @@ -102,6 +102,14 @@ public class PPLBuiltinOperators extends ReflectiveSqlOperatorTable {
public static final SqlOperator SHA2 = CryptographicFunction.sha2().toUDF("SHA2");
public static final SqlOperator CIDRMATCH = new CidrMatchFunction().toUDF("CIDRMATCH");

// IP comparing functions
public static final SqlOperator NOT_EQUALS_IP = new NotEqualsIpFunction().toUDF("NOT_EQUALS_IP");
public static final SqlOperator EQUALS_IP = new EqualsIpFunction().toUDF("EQUALS_IP");
public static final SqlOperator GREATER_IP = new GreaterIpFunction().toUDF("GREATER_IP");
public static final SqlOperator GTE_IP = new GteIpFunction().toUDF("GTE_IP");
public static final SqlOperator LESS_IP = new LessIpFunction().toUDF("LESS_IP");
public static final SqlOperator LTE_IP = new LteIpFunction().toUDF("LTE_IP");

// Condition function
public static final SqlOperator EARLIEST = new EarliestFunction().toUDF("EARLIEST");
public static final SqlOperator LATEST = new LatestFunction().toUDF("LATEST");
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -614,6 +614,14 @@ public PPLTypeChecker getTypeChecker() {
}

void populate() {
// register operators for IP comparing
registerOperator(NOTEQUAL, PPLBuiltinOperators.NOT_EQUALS_IP);
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Questions, Do we need to support IP as UDT in PPL engine?

In OpenSearch PPL, IP handling depends on index field type. CIDR-based filtering should use ip_range queries for ip fields, script pushdown / in-memory processing for keyword, text, and runtime string fields.

Field Type Use Case Expectation
IP Field search index=log ip="192.168.0.0/16" Rewrite as term query
  search index | where cidrmatch("192.168.0.0/16", ip) Rewrite as term query
Keyword Field search index=log ip="192.168.0.0/16" Rewrite as term query, extactally keyword match
  search index | where cidrmatch("192.168.0.0/16", ip) Script pushdown — ip field is a string, not rewrite as term query.
Text Field search index=log ip="192.168.0.0/16" Rewrite as query_string query, full text search
  search index | where cidrmatch("192.168.0.0/16", ip) Script pushdown — ip field is a string, not rewrite as term query.
Runtime Field search index=log | parse ip=regex(...)| where ip="192.168.0.0/16" Script pushdown — ip field is a string, it is a string comparsion query
  search index=log | parse ip=regex(...)| where cidrmatch("192.168.0.0/16", ip) Script pushdown — ip field is a string, not rewrite as term query.

registerOperator(EQUAL, PPLBuiltinOperators.EQUALS_IP);
registerOperator(GREATER, PPLBuiltinOperators.GREATER_IP);
registerOperator(GTE, PPLBuiltinOperators.GTE_IP);
registerOperator(LESS, PPLBuiltinOperators.LESS_IP);
registerOperator(LTE, PPLBuiltinOperators.LTE_IP);

// Register std operator
registerOperator(AND, SqlStdOperatorTable.AND);
registerOperator(OR, SqlStdOperatorTable.OR);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -400,6 +400,10 @@ private static List<ExprType> getExprTypes(SqlTypeFamily family) {
OpenSearchTypeFactory.TYPE_FACTORY.createSqlType(SqlTypeName.INTEGER));
case ANY, IGNORE -> List.of(
OpenSearchTypeFactory.TYPE_FACTORY.createSqlType(SqlTypeName.ANY));
// We borrow SqlTypeFamily.NULL to represent EXPR_IP. This is a workaround
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If we stop use CompositeOperandTypeChecker and create a new TypeChecker ourself, maybe we can avoid using SqlTypeFamily.NULL to represent EXPR_IP and check operands on RelDataType level.

// since there is no corresponding IP type family in Calcite.
case NULL -> List.of(
OpenSearchTypeFactory.TYPE_FACTORY.createUDT(OpenSearchTypeFactory.ExprUDT.EXPR_IP));
default -> {
RelDataType type = family.getDefaultConcreteType(OpenSearchTypeFactory.TYPE_FACTORY);
if (type == null) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,9 +12,7 @@
import org.apache.calcite.linq4j.tree.Expression;
import org.apache.calcite.linq4j.tree.Expressions;
import org.apache.calcite.rex.RexCall;
import org.apache.calcite.sql.type.OperandTypes;
import org.apache.calcite.sql.type.ReturnTypes;
import org.apache.calcite.sql.type.SqlReturnTypeInference;
import org.apache.calcite.sql.type.*;
import org.opensearch.sql.data.model.ExprIpValue;
import org.opensearch.sql.data.model.ExprValue;
import org.opensearch.sql.data.model.ExprValueUtils;
Expand Down Expand Up @@ -44,9 +42,12 @@ public SqlReturnTypeInference getReturnTypeInference() {

@Override
public UDFOperandMetadata getOperandMetadata() {
// EXPR_IP is mapped to SqlTypeFamily.VARCHAR in
// EXPR_IP is mapped to SqlTypeFamily.NULL in
// UserDefinedFunctionUtils.convertRelDataTypeToSqlTypeName
return UDFOperandMetadata.wrap(OperandTypes.STRING_STRING);
return UDFOperandMetadata.wrap(
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe we should use new PPLTypeChecker here and override its checkOperandTypes method to only allow accept string or ip.

(CompositeOperandTypeChecker)
OperandTypes.family(SqlTypeFamily.STRING, SqlTypeFamily.STRING)
.or(OperandTypes.family(SqlTypeFamily.NULL, SqlTypeFamily.STRING)));
}

public static class CidrMatchImplementor implements NotNullImplementor {
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,87 @@
/*
* Copyright OpenSearch Contributors
* SPDX-License-Identifier: Apache-2.0
*/

package org.opensearch.sql.expression.function.udf.ip;

import inet.ipaddr.IPAddress;
import java.util.List;
import org.apache.calcite.adapter.enumerable.NotNullImplementor;
import org.apache.calcite.adapter.enumerable.NullPolicy;
import org.apache.calcite.adapter.enumerable.RexToLixTranslator;
import org.apache.calcite.linq4j.tree.Expression;
import org.apache.calcite.linq4j.tree.Expressions;
import org.apache.calcite.rex.RexCall;
import org.apache.calcite.sql.type.*;
import org.opensearch.sql.data.model.ExprIpValue;
import org.opensearch.sql.exception.SemanticCheckException;
import org.opensearch.sql.expression.function.ImplementorUDF;
import org.opensearch.sql.expression.function.UDFOperandMetadata;
import org.opensearch.sql.utils.IPUtils;

/**
* {@code Equals(ip1, ip2)} checks if two IP addresses are equal.
*
* <p>Signature:
*
* <ul>
* <li>(STRING, STRING) -> BOOLEAN
* <li>(IP, STRING) -> BOOLEAN
* <li>(STRING, IP) -> BOOLEAN
* <li>(IP, IP) -> BOOLEAN
* </ul>
*/
public class EqualsIpFunction extends ImplementorUDF {
public EqualsIpFunction() {
super(new EqualsImplementor(), NullPolicy.ANY);
}

@Override
public SqlReturnTypeInference getReturnTypeInference() {
return ReturnTypes.BOOLEAN_FORCE_NULLABLE;
}

@Override
public UDFOperandMetadata getOperandMetadata() {
return UDFOperandMetadata.wrap(
(CompositeOperandTypeChecker)
OperandTypes.family(SqlTypeFamily.STRING, SqlTypeFamily.NULL)
.or(OperandTypes.family(SqlTypeFamily.NULL, SqlTypeFamily.STRING))
.or(OperandTypes.family(SqlTypeFamily.NULL, SqlTypeFamily.NULL)));
}

public static class EqualsImplementor implements NotNullImplementor {
@Override
public Expression implement(
RexToLixTranslator translator, RexCall call, List<Expression> translatedOperands) {
return Expressions.call(EqualsImplementor.class, "Equals", translatedOperands);
}

public static boolean Equals(String ip1, String ip2) {
try {
IPAddress ipAddress1 = IPUtils.toAddress(ip1);
IPAddress ipAddress2 = IPUtils.toAddress(ip2);
return IPUtils.compare(ipAddress1, ipAddress2) == 0;
} catch (SemanticCheckException e) {
return false;
}
}

public static boolean Equals(String ip1, ExprIpValue ip2) {
String ipAddress2 = ip2.value();
return Equals(ip1, ipAddress2);
}

public static boolean Equals(ExprIpValue ip1, String ip2) {
String ipAddress1 = ip1.value();
return Equals(ipAddress1, ip2);
}

public static boolean Equals(ExprIpValue ip1, ExprIpValue ip2) {
String ipAddress1 = ip1.value();
String ipAddress2 = ip2.value();
return Equals(ipAddress1, ipAddress2);
}
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,87 @@
/*
* Copyright OpenSearch Contributors
* SPDX-License-Identifier: Apache-2.0
*/

package org.opensearch.sql.expression.function.udf.ip;

import inet.ipaddr.IPAddress;
import java.util.List;
import org.apache.calcite.adapter.enumerable.NotNullImplementor;
import org.apache.calcite.adapter.enumerable.NullPolicy;
import org.apache.calcite.adapter.enumerable.RexToLixTranslator;
import org.apache.calcite.linq4j.tree.Expression;
import org.apache.calcite.linq4j.tree.Expressions;
import org.apache.calcite.rex.RexCall;
import org.apache.calcite.sql.type.*;
import org.opensearch.sql.data.model.ExprIpValue;
import org.opensearch.sql.exception.SemanticCheckException;
import org.opensearch.sql.expression.function.ImplementorUDF;
import org.opensearch.sql.expression.function.UDFOperandMetadata;
import org.opensearch.sql.utils.IPUtils;

/**
* {@code Greater(ip1, ip2)} checks if ip1 is greater than ip2.
*
* <p>Signature:
*
* <ul>
* <li>(STRING, STRING) -> BOOLEAN
* <li>(IP, STRING) -> BOOLEAN
* <li>(STRING, IP) -> BOOLEAN
* <li>(IP, IP) -> BOOLEAN
* </ul>
*/
public class GreaterIpFunction extends ImplementorUDF {
public GreaterIpFunction() {
super(new GreaterImplementor(), NullPolicy.ANY);
}

@Override
public SqlReturnTypeInference getReturnTypeInference() {
return ReturnTypes.BOOLEAN_FORCE_NULLABLE;
}

@Override
public UDFOperandMetadata getOperandMetadata() {
return UDFOperandMetadata.wrap(
(CompositeOperandTypeChecker)
OperandTypes.family(SqlTypeFamily.STRING, SqlTypeFamily.NULL)
.or(OperandTypes.family(SqlTypeFamily.NULL, SqlTypeFamily.STRING))
.or(OperandTypes.family(SqlTypeFamily.NULL, SqlTypeFamily.NULL)));
}

public static class GreaterImplementor implements NotNullImplementor {
@Override
public Expression implement(
RexToLixTranslator translator, RexCall call, List<Expression> translatedOperands) {
return Expressions.call(GreaterImplementor.class, "Greater", translatedOperands);
}

public static boolean Greater(String ip1, String ip2) {
try {
IPAddress ipAddress1 = IPUtils.toAddress(ip1);
IPAddress ipAddress2 = IPUtils.toAddress(ip2);
return IPUtils.compare(ipAddress1, ipAddress2) > 0;
} catch (SemanticCheckException e) {
return false;
}
}

public static boolean Greater(String ip1, ExprIpValue ip2) {
String ipAddress2 = ip2.value();
return Greater(ip1, ipAddress2);
}

public static boolean Greater(ExprIpValue ip1, String ip2) {
String ipAddress1 = ip1.value();
return Greater(ipAddress1, ip2);
}

public static boolean Greater(ExprIpValue ip1, ExprIpValue ip2) {
String ipAddress1 = ip1.value();
String ipAddress2 = ip2.value();
return Greater(ipAddress1, ipAddress2);
}
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,87 @@
/*
* Copyright OpenSearch Contributors
* SPDX-License-Identifier: Apache-2.0
*/

package org.opensearch.sql.expression.function.udf.ip;

import inet.ipaddr.IPAddress;
import java.util.List;
import org.apache.calcite.adapter.enumerable.NotNullImplementor;
import org.apache.calcite.adapter.enumerable.NullPolicy;
import org.apache.calcite.adapter.enumerable.RexToLixTranslator;
import org.apache.calcite.linq4j.tree.Expression;
import org.apache.calcite.linq4j.tree.Expressions;
import org.apache.calcite.rex.RexCall;
import org.apache.calcite.sql.type.*;
import org.opensearch.sql.data.model.ExprIpValue;
import org.opensearch.sql.exception.SemanticCheckException;
import org.opensearch.sql.expression.function.ImplementorUDF;
import org.opensearch.sql.expression.function.UDFOperandMetadata;
import org.opensearch.sql.utils.IPUtils;

/**
* {@code Gte(ip1, ip2)} checks if ip1 is greater than or equals ip2.
*
* <p>Signature:
*
* <ul>
* <li>(STRING, STRING) -> BOOLEAN
* <li>(IP, STRING) -> BOOLEAN
* <li>(STRING, IP) -> BOOLEAN
* <li>(IP, IP) -> BOOLEAN
* </ul>
*/
public class GteIpFunction extends ImplementorUDF {
public GteIpFunction() {
super(new GteImplementor(), NullPolicy.ANY);
}

@Override
public SqlReturnTypeInference getReturnTypeInference() {
return ReturnTypes.BOOLEAN_FORCE_NULLABLE;
}

@Override
public UDFOperandMetadata getOperandMetadata() {
return UDFOperandMetadata.wrap(
(CompositeOperandTypeChecker)
OperandTypes.family(SqlTypeFamily.STRING, SqlTypeFamily.NULL)
.or(OperandTypes.family(SqlTypeFamily.NULL, SqlTypeFamily.STRING))
.or(OperandTypes.family(SqlTypeFamily.NULL, SqlTypeFamily.NULL)));
}

public static class GteImplementor implements NotNullImplementor {
@Override
public Expression implement(
RexToLixTranslator translator, RexCall call, List<Expression> translatedOperands) {
return Expressions.call(GteImplementor.class, "Gte", translatedOperands);
}

public static boolean Gte(String ip1, String ip2) {
try {
IPAddress ipAddress1 = IPUtils.toAddress(ip1);
IPAddress ipAddress2 = IPUtils.toAddress(ip2);
return IPUtils.compare(ipAddress1, ipAddress2) >= 0;
} catch (SemanticCheckException e) {
return false;
}
}

public static boolean Gte(String ip1, ExprIpValue ip2) {
String ipAddress2 = ip2.value();
return Gte(ip1, ipAddress2);
}

public static boolean Gte(ExprIpValue ip1, String ip2) {
String ipAddress1 = ip1.value();
return Gte(ipAddress1, ip2);
}

public static boolean Gte(ExprIpValue ip1, ExprIpValue ip2) {
String ipAddress1 = ip1.value();
String ipAddress2 = ip2.value();
return Gte(ipAddress1, ipAddress2);
}
}
}
Loading
Loading