24
24
import org .apache .iceberg .ManifestFiles ;
25
25
import org .apache .iceberg .PartitionData ;
26
26
import org .apache .iceberg .PartitionField ;
27
+ import org .apache .iceberg .Schema ;
28
+ import org .apache .iceberg .Snapshot ;
27
29
import org .apache .iceberg .Table ;
28
30
import org .apache .iceberg .data .GenericRecord ;
29
31
import org .apache .iceberg .types .Type ;
41
43
42
44
class IcebergPartitionsJniScanner extends IcebergMetadataJniScanner {
43
45
private static final String NAME = "partitions" ;
46
+ private static final List <String > SCAN_COLUMNS = List .of ("content" , "partition" , "file_size_in_bytes" ,
47
+ "record_count" );
44
48
45
49
private List <PartitionField > partitionFields ;
46
50
private Long lastUpdateTime ;
51
+ private Long lastUpdateSnapshotId ;
47
52
private Integer specId ;
53
+ private Schema schema ;
48
54
private GenericRecord reusedRecord ;
49
55
50
56
// A serializable bean that contains a bare minimum to read a manifest
@@ -58,16 +64,18 @@ public IcebergPartitionsJniScanner(int batchSize, Map<String, String> params) {
58
64
@ Override
59
65
protected void initReader () throws IOException {
60
66
this .specId = manifestBean .partitionSpecId ();
61
- this . lastUpdateTime = table .snapshot (manifestBean .snapshotId ()) != null
62
- ? table . snapshot ( manifestBean . snapshotId ()). timestampMillis ()
63
- : null ;
67
+ Snapshot snapshot = table .snapshot (manifestBean .snapshotId ());
68
+ this . lastUpdateTime = snapshot != null ? snapshot . timestampMillis () : null ;
69
+ this . lastUpdateSnapshotId = snapshot != null ? snapshot . snapshotId () : null ;
64
70
this .partitionFields = getAllPartitionFields (table );
65
- // TODO: Initialize the reused record with partition fields
66
- // this.reusedRecord = GenericRecord.create(getResultType());
71
+ this . schema = table . schema ();
72
+ this .reusedRecord = GenericRecord .create (getResultType ());
67
73
if (manifestBean .content () == ManifestContent .DATA ) {
68
- reader = ManifestFiles .read (manifestBean , table .io (), table .specs ()).iterator ();
74
+ reader = ManifestFiles .read (manifestBean , table .io (), table .specs ()).select (SCAN_COLUMNS )
75
+ .caseSensitive (false ).iterator ();
69
76
} else {
70
- reader = ManifestFiles .readDeleteManifest (manifestBean , table .io (), table .specs ()).iterator ();
77
+ reader = ManifestFiles .readDeleteManifest (manifestBean , table .io (), table .specs ()).select (SCAN_COLUMNS )
78
+ .caseSensitive (false ).iterator ();
71
79
}
72
80
}
73
81
@@ -83,25 +91,39 @@ protected Object getColumnValue(String columnName, Object row) {
83
91
case "record_count" :
84
92
return content == FileContent .DATA ? file .recordCount () : 0 ;
85
93
case "file_count" :
86
- return content == FileContent .DATA ? 1L : 0L ;
94
+ return content == FileContent .DATA ? 1 : 0 ;
87
95
case "total_data_file_size_in_bytes" :
88
96
return content == FileContent .DATA ? file .fileSizeInBytes () : 0 ;
89
97
case "position_delete_record_count" :
90
98
return content == FileContent .POSITION_DELETES ? file .recordCount () : 0 ;
91
99
case "position_delete_file_count" :
92
- return content == FileContent .POSITION_DELETES ? 1L : 0L ;
100
+ return content == FileContent .POSITION_DELETES ? 1 : 0 ;
93
101
case "equality_delete_record_count" :
94
102
return content == FileContent .EQUALITY_DELETES ? file .recordCount () : 0 ;
95
103
case "equality_delete_file_count" :
96
- return content == FileContent .EQUALITY_DELETES ? 1L : 0L ;
104
+ return content == FileContent .EQUALITY_DELETES ? 1 : 0 ;
97
105
case "last_updated_at" :
98
106
return lastUpdateTime ;
107
+ case "last_updated_snapshot_id" :
108
+ return lastUpdateSnapshotId ;
99
109
default :
100
- throw new IllegalArgumentException (
101
- "Unrecognized column name " + columnName + " in Iceberg " + NAME + " metadata table" ) ;
110
+ LOG . warn ( "Unrecognized column name " + columnName + " in Iceberg " + NAME + " metadata table" );
111
+ return null ;
102
112
}
103
113
}
104
114
115
+ private Types .StructType getResultType () {
116
+ List <Types .NestedField > fields = new ArrayList <>();
117
+ for (PartitionField partitionField : partitionFields ) {
118
+ int id = partitionField .fieldId ();
119
+ String name = partitionField .name ();
120
+ Type type = partitionField .transform ().getResultType (schema .findType (partitionField .sourceId ()));
121
+ Types .NestedField nestedField = Types .NestedField .optional (id , name , type );
122
+ fields .add (nestedField );
123
+ }
124
+ return Types .StructType .of (fields );
125
+ }
126
+
105
127
private Object getPartitionValues (PartitionData partitionData ) {
106
128
List <Types .NestedField > fileFields = partitionData .getPartitionType ().fields ();
107
129
Map <Integer , Integer > fieldIdToPos = new HashMap <>();
0 commit comments