18
18
package org .apache .doris .iceberg ;
19
19
20
20
import org .apache .doris .common .jni .vec .ColumnValue ;
21
-
22
- import com .google .common .collect .ImmutableMap ;
23
- import com .google .common .collect .Maps ;
24
- import com .google .common .collect .Streams ;
25
- import org .apache .iceberg .MetadataTableType ;
26
- import org .apache .iceberg .MetadataTableUtils ;
27
- import org .apache .iceberg .StructLike ;
21
+ import org .apache .iceberg .Snapshot ;
28
22
import org .apache .iceberg .Table ;
29
- import org .apache .iceberg .TableScan ;
30
- import org .apache .iceberg .io .CloseableIterator ;
31
23
32
24
import java .io .IOException ;
33
25
import java .util .HashMap ;
26
+ import java .util .Iterator ;
34
27
import java .util .Map ;
35
28
36
29
class IcebergSnapshotsJniScanner extends IcebergMetadataJniScanner {
37
30
38
- private CloseableIterator <StructLike > reader ;
39
- private Map <String , Integer > columnNameToPosition = new HashMap <>();
31
+ private static final Map <String , String > SNAPSHOTS_SCHEMA = new HashMap <>();
32
+ static {
33
+ SNAPSHOTS_SCHEMA .put ("committed_at" , "datetime" );
34
+ SNAPSHOTS_SCHEMA .put ("snapshot_id" , "long" );
35
+ SNAPSHOTS_SCHEMA .put ("parent_id" , "long" );
36
+ SNAPSHOTS_SCHEMA .put ("operation" , "string" );
37
+ SNAPSHOTS_SCHEMA .put ("manifest_list" , "string" );
38
+ SNAPSHOTS_SCHEMA .put ("summary" , "string" );
39
+ }
40
+
41
+ private Iterator <Snapshot > reader ;
40
42
41
43
public IcebergSnapshotsJniScanner (int batchSize , Map <String , String > params ) {
42
44
super (batchSize , params );
43
45
}
44
46
45
47
@ Override
46
48
protected void loadTable (Table table ) throws IOException {
47
- TableScan tableScan = MetadataTableUtils .createMetadataTableInstance (table , MetadataTableType .SNAPSHOTS )
48
- .newScan ();
49
- this .columnNameToPosition = Streams .mapWithIndex (tableScan .schema ().columns ().stream (),
50
- (column , position ) -> Maps .immutableEntry (column .name (), Long .valueOf (position ).intValue ()))
51
- .collect (ImmutableMap .toImmutableMap (Map .Entry ::getKey , Map .Entry ::getValue ));
52
- for (String requiredField : requiredFields ) {
53
- if (!columnNameToPosition .containsKey (requiredField )) {
54
- throw new IOException ("Invalid required field: " + requiredField );
55
- }
56
- }
57
- this .reader = tableScan .planFiles ().iterator ().next ().asDataTask ().rows ().iterator ();
49
+ reader = table .snapshots ().iterator ();
58
50
}
59
51
60
52
@ Override
@@ -64,10 +56,10 @@ protected int getNext() throws IOException {
64
56
}
65
57
int rows = 0 ;
66
58
while (reader .hasNext () && rows < getBatchSize ()) {
67
- StructLike dataRow = reader .next ();
59
+ Snapshot snapshot = reader .next ();
68
60
for (int i = 0 ; i < requiredFields .length ; i ++) {
69
61
String columnName = requiredFields [i ];
70
- Object value = getValue (columnName , dataRow );
62
+ Object value = getValue (columnName , snapshot );
71
63
if (value == null ) {
72
64
appendData (i , null );
73
65
} else {
@@ -82,39 +74,34 @@ protected int getNext() throws IOException {
82
74
83
75
@ Override
84
76
public void close () throws IOException {
77
+ // TODO: move this to base class
85
78
if (reader != null ) {
86
- reader . close ();
79
+ reader = null ; // Clear the iterator to release resources
87
80
}
88
81
}
89
82
90
83
@ Override
91
- protected HashMap <String , String > getMetadataSchema () {
92
- HashMap <String , String > metadataSchema = new HashMap <>();
93
- metadataSchema .put ("committed_at" , "long" );
94
- metadataSchema .put ("snapshot_id" , "long" );
95
- metadataSchema .put ("parent_id" , "long" );
96
- metadataSchema .put ("operation" , "string" );
97
- metadataSchema .put ("manifest_list" , "string" );
98
- metadataSchema .put ("summary" , "string" );
99
- return metadataSchema ;
84
+ protected Map <String , String > getMetadataSchema () {
85
+ return SNAPSHOTS_SCHEMA ;
100
86
}
101
87
102
- private Object getValue (String columnName , StructLike dataRow ) {
88
+ private Object getValue (String columnName , Snapshot snapshot ) {
103
89
switch (columnName ) {
104
90
case "committed_at" :
105
- return dataRow . get ( columnNameToPosition . get ( columnName ), Long . class ) / 1000 ;
91
+ return snapshot . timestampMillis () ;
106
92
case "snapshot_id" :
107
- return dataRow . get ( columnNameToPosition . get ( columnName ), Long . class );
93
+ return snapshot . snapshotId ( );
108
94
case "parent_id" :
109
- return dataRow . get ( columnNameToPosition . get ( columnName ), Long . class );
95
+ return snapshot . parentId ( );
110
96
case "operation" :
111
- return dataRow . get ( columnNameToPosition . get ( columnName ), String . class );
97
+ return snapshot . operation ( );
112
98
case "manifest_list" :
113
- return dataRow . get ( columnNameToPosition . get ( columnName ), String . class );
99
+ return snapshot . manifestListLocation ( );
114
100
case "summary" :
115
- return dataRow . get ( columnNameToPosition . get ( columnName ), Map . class );
101
+ return snapshot . summary ( );
116
102
default :
117
- throw new IllegalArgumentException ("Unrecognized column name " + columnName );
103
+ throw new IllegalArgumentException (
104
+ "Unrecognized column name " + columnName + " in Iceberg snapshot metadata table" );
118
105
}
119
106
}
120
107
}
0 commit comments