Skip to content

Draft: feat(java): row encoder array deserialization into List supports lazy access #2358

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -78,16 +78,15 @@ public ArrayDataForEach(
super(inputArrayData);
Preconditions.checkArgument(getRawType(inputArrayData.type()) == BinaryArray.class);
this.inputArrayData = inputArrayData;
TypeRef<?> accessType;
CustomTypeHandler customTypeHandler = CustomTypeEncoderRegistry.customTypeHandler();
CustomCodec<?, ?> customEncoder =
CustomTypeEncoderRegistry.customTypeHandler()
.findCodec(BinaryArray.class, elemType.getRawType());
customTypeHandler.findCodec(BinaryArray.class, elemType.getRawType());
TypeRef<?> accessType;
if (customEncoder == null) {
accessType = elemType;
} else {
accessType = customEncoder.encodedType();
}
CustomTypeHandler customTypeHandler = CustomTypeEncoderRegistry.customTypeHandler();
TypeResolutionContext ctx = new TypeResolutionContext(customTypeHandler, true);
this.accessMethod = BinaryUtils.getElemAccessMethodName(accessType, ctx);
this.elemType = BinaryUtils.getElemReturnType(accessType, ctx);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
import java.util.Arrays;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Objects;
import java.util.Optional;
Expand Down Expand Up @@ -680,9 +681,16 @@ protected Expression deserializeForMap(Expression mapData, TypeRef<?> typeRef) {

/** Returns an expression that deserialize <code>arrayData</code> as a java collection. */
protected Expression deserializeForCollection(Expression arrayData, TypeRef<?> typeRef) {
TypeRef<?> elemType = TypeUtils.getElementType(typeRef);
if (typeRef.getRawType() == List.class) {
return new LazyArrayData(
arrayData,
elemType,
(i, value) -> deserializeFor(value, elemType, typeCtx, new HashSet<>()),
ExpressionUtils.nullValue(elemType));
}
Expression collection = newCollection(arrayData, typeRef);
try {
TypeRef<?> elemType = TypeUtils.getElementType(typeRef);
ArrayDataForEach addElemsOp =
new ArrayDataForEach(
arrayData,
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/

package org.apache.fory.format.encoder;

import java.util.AbstractList;
import java.util.RandomAccess;
import org.apache.fory.annotation.Internal;
import org.apache.fory.format.row.binary.BinaryArray;

@Internal
public abstract class LazyArray<E> extends AbstractList<E> implements RandomAccess {
protected final BinaryArray array;
private final Object[] storage;

public LazyArray(final BinaryArray array) {
this.array = array;
storage = new Object[array.numElements()];
}

@Override
public int size() {
return array.numElements();
}

@SuppressWarnings("unchecked")
@Override
public E get(final int index) {
Object value = storage[index];
if (value == null) {
value = deserialize(index);
if (value != null) {
storage[index] = value;
}
}
return (E) value;
}

protected abstract Object deserialize(int index);
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,144 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/

package org.apache.fory.format.encoder;

import static org.apache.fory.type.TypeUtils.getRawType;

import java.awt.List;
import org.apache.fory.annotation.Internal;
import org.apache.fory.codegen.ClosureVisitable;
import org.apache.fory.codegen.Code;
import org.apache.fory.codegen.CodeGenerator;
import org.apache.fory.codegen.CodegenContext;
import org.apache.fory.codegen.Expression;
import org.apache.fory.codegen.Expression.AbstractExpression;
import org.apache.fory.format.row.binary.BinaryArray;
import org.apache.fory.format.row.binary.BinaryUtils;
import org.apache.fory.format.type.CustomTypeEncoderRegistry;
import org.apache.fory.format.type.CustomTypeHandler;
import org.apache.fory.reflect.TypeRef;
import org.apache.fory.type.TypeResolutionContext;
import org.apache.fory.type.TypeUtils;
import org.apache.fory.util.Preconditions;
import org.apache.fory.util.StringUtils;
import org.apache.fory.util.function.SerializableBiFunction;

/**
* Expression to represent {@link org.apache.fory.format.row.ArrayData} as a lazy List implemented
* with array backing storage.
*/
@Internal
public class LazyArrayData extends AbstractExpression {
private final Expression inputArrayData;
private final String accessMethod;
private final TypeRef<?> elemType;

@ClosureVisitable
private final SerializableBiFunction<Expression, Expression, Expression> notNullAction;

@ClosureVisitable private final Expression nullValue;

/**
* inputArrayData.type() must be multi-dimension array or Collection, not allowed to be primitive
* array
*/
public LazyArrayData(
Expression inputArrayData,
TypeRef<?> elemType,
SerializableBiFunction<Expression, Expression, Expression> notNullAction) {
this(inputArrayData, elemType, notNullAction, null);
}

/**
* inputArrayData.type() must be multi-dimension array or Collection, not allowed to be primitive
* array
*/
public LazyArrayData(
Expression inputArrayData,
TypeRef<?> elemType,
SerializableBiFunction<Expression, Expression, Expression> notNullAction,
Expression nullValue) {
super(inputArrayData);
Preconditions.checkArgument(getRawType(inputArrayData.type()) == BinaryArray.class);
this.inputArrayData = inputArrayData;
CustomTypeHandler customTypeHandler = CustomTypeEncoderRegistry.customTypeHandler();
CustomCodec<?, ?> customEncoder =
customTypeHandler.findCodec(BinaryArray.class, elemType.getRawType());
TypeRef<?> accessType;
if (customEncoder == null) {
accessType = elemType;
} else {
accessType = customEncoder.encodedType();
}
TypeResolutionContext ctx = new TypeResolutionContext(customTypeHandler, true);
this.accessMethod = BinaryUtils.getElemAccessMethodName(accessType, ctx);
this.elemType = BinaryUtils.getElemReturnType(accessType, ctx);
this.notNullAction = notNullAction;
this.nullValue = nullValue;
}

@Override
public TypeRef<?> type() {
return TypeUtils.listOf(elemType.getRawType());
}

@Override
public Code.ExprCode doGenCode(CodegenContext ctx) {
StringBuilder codeBuilder = new StringBuilder();
Code.ExprCode targetExprCode = inputArrayData.genCode(ctx);
if (StringUtils.isNotBlank(targetExprCode.code())) {
codeBuilder.append(targetExprCode.code()).append("\n");
}
Code.ExprCode notNullElemExprCode =
new Expression.Return(
notNullAction.apply(new Reference("index"), new Reference("elemValue")))
.genCode(ctx);
Code.ExprCode nullElemCode = nullValue.genCode(ctx);
String result = ctx.newName("lazyArray");
String code =
StringUtils.format(
""
+ "java.util.List ${result} = \n"
+ " new org.apache.fory.format.encoder.LazyArray(${arr}) {\n"
+ " protected Object deserialize(int index) {\n"
+ " if (!array.isNullAt(index)) {\n"
+ " ${elemType} elemValue = array.${method}(index);\n"
+ " ${notNullElemExprCode}\n"
+ " } else {\n"
+ " return ${nullElemCode};\n"
+ " }\n"
+ " }\n"
+ " };",
"result",
result,
"arr",
targetExprCode.value(),
"elemType",
ctx.type(elemType),
"method",
accessMethod,
"notNullElemExprCode",
CodeGenerator.alignIndent(notNullElemExprCode.code(), 8),
"nullElemCode",
nullElemCode.value());
codeBuilder.append(code);
return new Code.ExprCode(codeBuilder.toString(), null, Code.variable(List.class, result));
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -24,9 +24,11 @@
import java.util.Optional;
import java.util.TreeSet;
import lombok.Data;
import org.apache.arrow.vector.types.pojo.Field;
import org.apache.fory.annotation.ForyField;
import org.apache.fory.format.row.binary.BinaryArray;
import org.apache.fory.format.row.binary.BinaryRow;
import org.apache.fory.format.type.DataTypes;
import org.apache.fory.memory.MemoryBuffer;
import org.apache.fory.memory.MemoryUtils;
import org.apache.fory.reflect.TypeRef;
Expand Down Expand Up @@ -81,6 +83,7 @@ public ImplementNestedType(final String f3) {
static {
Encoders.registerCustomCodec(PoisonPill.class, new PoisonPillCodec());
Encoders.registerCustomCodec(Id.class, new IdCodec());
Encoders.registerCustomCodec(ListLazyElemInner.class, new ListLazyElemInnerCodec());
}

@Test
Expand Down Expand Up @@ -298,4 +301,79 @@ public void testTreeSetOfInterface() {
final TreeSet<Value> deserializedBean = encoder.fromArray(array);
Assert.assertEquals(deserializedBean, expected);
}

public static class ListLazyElemInner {
private final int f1;
static boolean check;

ListLazyElemInner(final int f1) {
if (check) {
Assert.assertEquals(f1, 42);
}
this.f1 = f1;
}

public int f1() {
return f1;
}
}

static class ListLazyElemInnerCodec implements CustomCodec<ListLazyElemInner, Integer> {
@Override
public Field getField(final String fieldName) {
return DataTypes.field(fieldName, DataTypes.int32());
}

@Override
public TypeRef<Integer> encodedType() {
return TypeRef.of(Integer.class);
}

@Override
public Integer encode(final ListLazyElemInner value) {
return value.f1();
}

@Override
public ListLazyElemInner decode(final Integer value) {
return new ListLazyElemInner(value);
}
}

public interface ListLazyElemOuter {
List<ListLazyElemInner> f1();
}

static class ListLazyElemOuterImpl implements ListLazyElemOuter {
private final List<ListLazyElemInner> f1;

ListLazyElemOuterImpl(final List<ListLazyElemInner> f1) {
this.f1 = f1;
}

@Override
public List<ListLazyElemInner> f1() {
return f1;
}
}

@Test
public void testListElementsLazy() {
final ListLazyElemOuter bean1 =
new ListLazyElemOuterImpl(
Arrays.asList(
new ListLazyElemInner(0),
new ListLazyElemInner(1),
new ListLazyElemInner(42),
null,
new ListLazyElemInner(4)));
final RowEncoder<ListLazyElemOuter> encoder = Encoders.bean(ListLazyElemOuter.class);
final BinaryRow row = encoder.toRow(bean1);
ListLazyElemInner.check = true;
final MemoryBuffer buffer = MemoryUtils.wrap(row.toBytes());
row.pointTo(buffer, 0, buffer.size());
final ListLazyElemOuter deserializedBean = encoder.fromRow(row);
Assert.assertEquals(deserializedBean.f1().get(2).f1(), 42);
Assert.assertEquals(deserializedBean.f1().get(3), null);
}
}
Loading