Skip to content

Snow-1936378 add support for vector type for loader #2161

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 24 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 13 commits
Commits
Show all changes
24 commits
Select commit Hold shift + click to select a range
d5a4587
Add vector column search
sfc-gh-ext-simba-jf Apr 21, 2025
44b33ab
update insert
sfc-gh-ext-simba-jf Apr 21, 2025
7ca75fb
testing
sfc-gh-ext-simba-jf Apr 21, 2025
9d9e4c8
fix check-style
sfc-gh-ext-simba-jf Apr 21, 2025
30076e8
Merge branch 'master' into SNOW-1936378-Add-support-for-vector-type-f…
sfc-gh-ext-simba-jf Apr 25, 2025
374751c
Fix parsing
sfc-gh-ext-simba-jf Apr 28, 2025
75a65b6
Merge branch 'master' into SNOW-1936378-Add-support-for-vector-type-f…
sfc-gh-ext-simba-jf Apr 28, 2025
ff62bc0
Merge branch 'master' into SNOW-1936378-Add-support-for-vector-type-f…
sfc-gh-ext-simba-jf Apr 29, 2025
be0c4b0
Merge branch 'master' into SNOW-1936378-Add-support-for-vector-type-f…
sfc-gh-ext-simba-jf May 6, 2025
2126ae4
Merge branch 'master' into SNOW-1936378-Add-support-for-vector-type-f…
sfc-gh-ext-simba-jf May 7, 2025
a60567f
Merge branch 'master' into SNOW-1936378-Add-support-for-vector-type-f…
sfc-gh-ext-simba-jf May 9, 2025
8dfda07
Merge branch 'master' into SNOW-1936378-Add-support-for-vector-type-f…
sfc-gh-ext-simba-jf May 14, 2025
c32b59a
Hide tests to find error
sfc-gh-ext-simba-jf May 15, 2025
920de5a
Merge branch 'master' into SNOW-1936378-Add-support-for-vector-type-f…
sfc-gh-ext-simba-jf Jun 10, 2025
3726af7
Fix null pointer
sfc-gh-ext-simba-jf Jun 10, 2025
c2bdd21
code review changes
sfc-gh-ext-simba-jf Jun 10, 2025
a82121c
Quick fix
sfc-gh-ext-simba-jf Jun 10, 2025
1239a7b
Allow tests to run
sfc-gh-ext-simba-jf Jun 11, 2025
bc1e910
Merge branch 'master' into SNOW-1936378-Add-support-for-vector-type-f…
sfc-gh-ext-simba-jf Jun 13, 2025
05373b4
Merge branch 'master' into SNOW-1936378-Add-support-for-vector-type-f…
sfc-gh-ext-simba-jf Jun 17, 2025
1c1694e
Merge branch 'master' into SNOW-1936378-Add-support-for-vector-type-f…
sfc-gh-ext-simba-jf Jun 23, 2025
cbf77d4
Merge branch 'master' into SNOW-1936378-Add-support-for-vector-type-f…
sfc-gh-ext-simba-jf Jun 25, 2025
b0165ee
Merge branch 'master' into SNOW-1936378-Add-support-for-vector-type-f…
sfc-gh-pbulawa Jul 2, 2025
a6c3d0d
Merge branch 'master' into SNOW-1936378-Add-support-for-vector-type-f…
sfc-gh-ext-simba-jf Jul 2, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -265,7 +265,9 @@ public void run() {
+ "("
+ _loader.getColumnsAsString()
+ ")"
+ " SELECT * FROM \""
+ " SELECT "
+ _loader.getStageColumnsAsString()
+ " FROM \""
+ stage.getId()
+ "\"";
break;
Expand Down
50 changes: 50 additions & 0 deletions src/main/java/net/snowflake/client/loader/StreamLoader.java
Original file line number Diff line number Diff line change
Expand Up @@ -5,12 +5,14 @@
import java.io.IOException;
import java.sql.Connection;
import java.sql.DatabaseMetaData;
import java.sql.ResultSet;
import java.sql.SQLException;
import java.text.DateFormat;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Calendar;
import java.util.GregorianCalendar;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Properties;
Expand Down Expand Up @@ -83,6 +85,8 @@ public class StreamLoader implements Loader, Runnable {

private List<String> _columns;

private Map<String, Integer> _vectorColumns = new HashMap<String, Integer>();
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We need to update the name of this map to better communicate its function: keep column size.


private List<String> _keys;

private long _batchRowSize = DEFAULT_BATCH_ROW_SIZE;
Expand Down Expand Up @@ -178,6 +182,7 @@ public void setProperty(LoaderProperty property, Object value) {
typeCheckedColumns.add((String) e);
}
_columns = typeCheckedColumns;
setVectorColumns();
}
break;
case keys:
Expand Down Expand Up @@ -598,6 +603,23 @@ private void truncateTargetTable() {
}
}

public void setVectorColumns() {
try {
DatabaseMetaData dbmd = _processConn.getMetaData();
for (String col : _columns) {
try (ResultSet rs = metadata.getColumns(_database, _schema, _table, col)) {
// Check if column type is VECTOR, if true, add column name and size to vector column map.
if (rs.getString(6).equalsIgnoreCase("vector")) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could we extract the logic of this condition and name this check instead of a comment?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It would be better to store metadata for all columns, not only for the vector, in my opinion.

_vectorColumns.put(col, rs.getInt(7));
}
}
}
} catch (SQLException e) {
logger.error(e.getMessage(), e);
abort(new Loader.ConnectionError(Utils.getCause(e)));
}
}

@Override
public void run() {
try {
Expand Down Expand Up @@ -750,6 +772,10 @@ List<String> getColumns() {
return this._columns;
}

Map<String, Integer> getVectorColumns() {
return this._vectorColumns;
}

String getColumnsAsString() {
// comma separate list of column names
StringBuilder sb = new StringBuilder("\"");
Expand Down Expand Up @@ -904,4 +930,28 @@ public int getSubmittedRowCount() {
void setTestMode(boolean mode) {
this._testMode = mode;
}

public String getStageColumnsAsString() {
// if there are no vector columns in the target table just select * is needed from the staging
// table.
if (_vectorColumns.isEmpty()) {
return "*";
}

StringBuilder sb = new StringBuilder();
for (int i = 0; i < _columns.size(); i++) {
String colName = _columns.get(i);
if (_vectorColumns.containsKey(colName)) {
sb.append(colName + "::VECTOR(FLOAT, " + _vectorColumns.get(colName) + ")");
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What about INT types in a vector?

} else {
sb.append("\"");
sb.append(colName);
sb.append("\"");
}
if (i != _columns.size() - 1) {
sb.append(", ");
}
}
return sb.toString();
}
}
63 changes: 63 additions & 0 deletions src/test/java/net/snowflake/client/loader/LoaderLatestIT.java
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
import java.util.Arrays;
import java.util.Collections;
import java.util.Date;
import net.snowflake.client.annotations.DontRunOnGithubActions;
import net.snowflake.client.category.TestTags;
import org.junit.jupiter.api.Tag;
import org.junit.jupiter.api.Test;
Expand Down Expand Up @@ -201,4 +202,66 @@ public void testKeyClusteringTable() throws Exception {
}
}
}

@Test
@DontRunOnGithubActions
private void testVectorColumnInTable() throws Exception {
String tableName = "VECTOR_TABLE";
try {
testConnection
.createStatement()
.execute(
String.format("CREATE OR REPLACE TABLE %s (vector_col VECTOR(FLOAT, 3))", tableName));

TestDataConfigBuilder tdcb = new TestDataConfigBuilder(testConnection, putConnection);
tdcb.setOperation(Operation.INSERT)
.setStartTransaction(true)
.setTruncateTable(true)
.setTableName(tableName)
.setColumns(Arrays.asList("vector_col"));
StreamLoader loader = tdcb.getStreamLoader();
TestDataConfigBuilder.ResultListener listener = tdcb.getListener();
loader.start();

loader.submitRow(new Object[] {"[12, 14.0, 100]"});
loader.finish();
int submitted = listener.getSubmittedRowCount();
assertThat("submitted rows", submitted, equalTo(1));

} finally {
testConnection.createStatement().execute(String.format("DROP TABLE IF EXISTS %s", tableName));
}
}

@Test
@DontRunOnGithubActions
private void testMultipleVectorColumnsInTable() throws Exception {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We should also test the table with mixed types (the best with all types).

String tableName = "VECTOR_TABLE";
try {
testConnection
.createStatement()
.execute(
String.format(
"CREATE OR REPLACE TABLE %s (vec1 VECTOR(FLOAT, 3), vec2 VECTOR(FLOAT, 3))",
tableName));

TestDataConfigBuilder tdcb = new TestDataConfigBuilder(testConnection, putConnection);
tdcb.setOperation(Operation.INSERT)
.setStartTransaction(true)
.setTruncateTable(true)
.setTableName(tableName)
.setColumns(Arrays.asList("vector_col"));
StreamLoader loader = tdcb.getStreamLoader();
TestDataConfigBuilder.ResultListener listener = tdcb.getListener();
loader.start();

loader.submitRow(new Object[] {"[12, 14.0, 100]", "[12, 14.0, 100]"});
loader.finish();
int submitted = listener.getSubmittedRowCount();
assertThat("submitted rows", submitted, equalTo(1));

} finally {
testConnection.createStatement().execute(String.format("DROP TABLE IF EXISTS %s", tableName));
}
}
}
Loading