Skip to content

Commit 9c5836e

Browse files
authored
#191: XLSX driver auto-detects header row (#284)
* xlsx driver now detects header row.
1 parent 69bdab2 commit 9c5836e

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

57 files changed

+832
-496
lines changed

CHANGELOG.md

+2
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,8 @@ Breaking changes are annotated with ☢️.
2929
The renaming behavior is controlled by a new option `ingest.column.rename`
3030
([docs](https://sq.io/docs/config/#ingestcolumnrename)).
3131

32+
- [#191]: The [XLSX](https://sq.io/docs/drivers/xlsx) driver now detects header rows, like
33+
the CSV driver already does.
3234

3335
## [v0.40.0] - 2023-07-03
3436

cli/cli_test.go

+4-4
Original file line numberDiff line numberDiff line change
@@ -81,7 +81,7 @@ func TestSmoke(t *testing.T) {
8181
}
8282

8383
func TestCreateTblTestBytes(t *testing.T) {
84-
th, src, _, _ := testh.NewWith(t, sakila.Pg)
84+
th, src, _, _, _ := testh.NewWith(t, sakila.Pg)
8585
th.DiffDB(src)
8686

8787
tblDef := sqlmodel.NewTableDef(
@@ -103,7 +103,7 @@ func TestCreateTblTestBytes(t *testing.T) {
103103
func TestOutputRaw(t *testing.T) {
104104
t.Parallel()
105105

106-
for _, handle := range sakila.SQLAll() {
106+
for _, handle := range sakila.SQLLatest() {
107107
handle := handle
108108

109109
t.Run(handle, func(t *testing.T) {
@@ -121,7 +121,7 @@ func TestOutputRaw(t *testing.T) {
121121
[]kind.Kind{kind.Text, kind.Bytes},
122122
)
123123

124-
th, src, _, _ := testh.NewWith(t, handle)
124+
th, src, _, _, _ := testh.NewWith(t, handle)
125125

126126
// Create the table and insert data
127127
insertRow := []any{fixt.GopherFilename, wantBytes}
@@ -186,7 +186,7 @@ func TestExprNoSource(t *testing.T) {
186186
tr := testrun.New(context.Background(), t, nil).Hush()
187187
err := tr.Exec("--csv", "--no-header", tc.in)
188188
require.NoError(t, err)
189-
results := tr.MustReadCSV()
189+
results := tr.BindCSV()
190190
require.Len(t, results, 1)
191191
require.Equal(t, tc.want, results[0])
192192
})

cli/cmd_inspect.go

+8-2
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
11
package cli
22

33
import (
4+
"database/sql"
5+
46
"github.com/neilotoole/sq/cli/flag"
57
"github.com/neilotoole/sq/cli/run"
68
"github.com/spf13/cobra"
@@ -166,9 +168,13 @@ func execInspect(cmd *cobra.Command, args []string) error {
166168
}
167169

168170
if cmdFlagIsSetTrue(cmd, flag.InspectDBProps) {
169-
sqlDrvr := dbase.SQLDriver()
171+
var db *sql.DB
172+
if db, err = dbase.DB(ctx); err != nil {
173+
return err
174+
}
170175
var props map[string]any
171-
if props, err = sqlDrvr.DBProperties(ctx, dbase.DB()); err != nil {
176+
sqlDrvr := dbase.SQLDriver()
177+
if props, err = sqlDrvr.DBProperties(ctx, db); err != nil {
172178
return err
173179
}
174180

cli/cmd_inspect_test.go

+2-2
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,8 @@ import (
3333
// TestCmdInspect_json_yaml tests "sq inspect" for
3434
// the JSON and YAML formats.
3535
func TestCmdInspect_json_yaml(t *testing.T) {
36+
tutil.SkipShort(t, true)
37+
3638
testCases := []struct {
3739
handle string
3840
wantTbls []string
@@ -61,8 +63,6 @@ func TestCmdInspect_json_yaml(t *testing.T) {
6163
tc := tc
6264

6365
t.Run(tc.handle, func(t *testing.T) {
64-
t.Parallel()
65-
6666
tutil.SkipWindowsIf(t, tc.handle == sakila.XLSX, "XLSX too slow on windows workflow")
6767

6868
th := testh.New(t)

cli/cmd_slq_test.go

+5-5
Original file line numberDiff line numberDiff line change
@@ -102,7 +102,7 @@ func TestCmdSLQ_CSV(t *testing.T) {
102102
err := tr.Exec("slq", "--header=false", "--csv", fmt.Sprintf("%s.data", src.Handle))
103103
require.NoError(t, err)
104104

105-
recs := tr.MustReadCSV()
105+
recs := tr.BindCSV()
106106
require.Equal(t, sakila.TblActorCount, len(recs))
107107
}
108108

@@ -133,7 +133,7 @@ func TestCmdSLQ_OutputFlag(t *testing.T) {
133133

134134
func TestCmdSLQ_Join_cross_source(t *testing.T) {
135135
const queryTpl = `%s.customer | join(%s.address, .address_id) | where(.customer_id == %d) | .[0] | .customer_id, .email, .city_id` //nolint:lll
136-
handles := sakila.SQLAll()
136+
handles := sakila.SQLLatest()
137137

138138
// Attempt to join every SQL test source against every SQL test source.
139139
for _, h1 := range handles {
@@ -159,7 +159,7 @@ func TestCmdSLQ_Join_cross_source(t *testing.T) {
159159
err := tr.Exec("slq", "--header=false", "--csv", query)
160160
require.NoError(t, err)
161161

162-
recs := tr.MustReadCSV()
162+
recs := tr.BindCSV()
163163
require.Equal(t, 1, len(recs), "should only be one matching record")
164164
require.Equal(t, 3, len(recs[0]), "should have three fields")
165165
require.Equal(t, strconv.Itoa(sakila.MillerCustID), recs[0][0])
@@ -183,15 +183,15 @@ func TestCmdSLQ_ActiveSrcHandle(t *testing.T) {
183183
require.Equal(t, src.Handle, tr.Run.Config.Collection.Active().Handle)
184184
err := tr.Exec("slq", "--header=false", "--csv", "@sakila_sl3.actor")
185185
require.NoError(t, err)
186-
recs := tr.MustReadCSV()
186+
recs := tr.BindCSV()
187187
require.Equal(t, sakila.TblActorCount, len(recs))
188188

189189
// 2. Verify that it works using source.ActiveHandle as the src handle
190190
tr = testrun.New(th.Context, t, nil).Add(*src).Hush()
191191
require.Equal(t, src.Handle, tr.Run.Config.Collection.Active().Handle)
192192
err = tr.Exec("slq", "--header=false", "--csv", source.ActiveHandle+".actor")
193193
require.NoError(t, err)
194-
recs = tr.MustReadCSV()
194+
recs = tr.BindCSV()
195195
require.Equal(t, sakila.TblActorCount, len(recs))
196196
}
197197

cli/cmd_sql_test.go

+2-2
Original file line numberDiff line numberDiff line change
@@ -107,7 +107,7 @@ func TestCmdSQL_SelectFromUserDriver(t *testing.T) {
107107

108108
err := tr.Exec("sql", "--csv", "--header=false", "SELECT * FROM "+wantTbl.tblName)
109109
require.NoError(t, err)
110-
recs := tr.MustReadCSV()
110+
recs := tr.BindCSV()
111111
require.Equal(t, wantTbl.wantRows, len(recs),
112112
"expected %d rows in tbl {%s} but got %d", wantTbl.wantRows,
113113
wantTbl, len(recs))
@@ -185,7 +185,7 @@ func TestCmdSQL_StdinQuery(t *testing.T) {
185185
}
186186

187187
require.NoError(t, err)
188-
results := tr.MustReadCSV()
188+
results := tr.BindCSV()
189189
require.Equal(t, tc.wantCount, len(results))
190190
})
191191
}

cli/cmd_tbl.go

+20-8
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
package cli
22

33
import (
4+
"database/sql"
45
"fmt"
56

67
"github.com/neilotoole/sq/cli/run"
@@ -64,7 +65,8 @@ func newTblCopyCmd() *cobra.Command {
6465
}
6566

6667
func execTblCopy(cmd *cobra.Command, args []string) error {
67-
ru := run.FromContext(cmd.Context())
68+
ctx := cmd.Context()
69+
ru := run.FromContext(ctx)
6870
if len(args) == 0 || len(args) > 2 {
6971
return errz.New("one or two table args required")
7072
}
@@ -121,12 +123,17 @@ func execTblCopy(cmd *cobra.Command, args []string) error {
121123
}
122124

123125
var dbase driver.Database
124-
dbase, err = ru.Databases.Open(cmd.Context(), tblHandles[0].src)
126+
dbase, err = ru.Databases.Open(ctx, tblHandles[0].src)
127+
if err != nil {
128+
return err
129+
}
130+
131+
db, err := dbase.DB(ctx)
125132
if err != nil {
126133
return err
127134
}
128135

129-
copied, err := sqlDrvr.CopyTable(cmd.Context(), dbase.DB(), tblHandles[0].tbl, tblHandles[1].tbl, copyData)
136+
copied, err := sqlDrvr.CopyTable(ctx, db, tblHandles[0].tbl, tblHandles[1].tbl, copyData)
130137
if err != nil {
131138
return errz.Wrapf(err, "failed tbl copy %s.%s --> %s.%s",
132139
tblHandles[0].handle, tblHandles[0].tbl,
@@ -227,7 +234,8 @@ only applies to SQL sources.`,
227234
}
228235

229236
func execTblDrop(cmd *cobra.Command, args []string) (err error) {
230-
ru := run.FromContext(cmd.Context())
237+
ctx := cmd.Context()
238+
ru := run.FromContext(ctx)
231239
var tblHandles []tblHandle
232240
tblHandles, err = parseTableHandleArgs(ru.DriverRegistry, ru.Config.Collection, args)
233241
if err != nil {
@@ -245,12 +253,16 @@ func execTblDrop(cmd *cobra.Command, args []string) (err error) {
245253
}
246254

247255
var dbase driver.Database
248-
dbase, err = ru.Databases.Open(cmd.Context(), tblH.src)
249-
if err != nil {
256+
if dbase, err = ru.Databases.Open(ctx, tblH.src); err != nil {
250257
return err
251258
}
252-
err = sqlDrvr.DropTable(cmd.Context(), dbase.DB(), tblH.tbl, false)
253-
if err != nil {
259+
260+
var db *sql.DB
261+
if db, err = dbase.DB(ctx); err != nil {
262+
return err
263+
}
264+
265+
if err = sqlDrvr.DropTable(cmd.Context(), db, tblH.tbl, false); err != nil {
254266
return err
255267
}
256268

cli/output/yamlw/yamlw_test.go

+1-1
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@ func TestRecordWriter(t *testing.T) {
2323
first_name: NICK
2424
`
2525

26-
th, src, _, _ := testh.NewWith(t, sakila.SL3)
26+
th, src, _, _, _ := testh.NewWith(t, sakila.SL3)
2727
query := src.Handle + ".actor | .last_update, .actor_id, .last_name, .first_name | .[0:2]"
2828

2929
sink, err := th.QuerySLQ(query, nil)

cli/testrun/testrun.go

+12-3
Original file line numberDiff line numberDiff line change
@@ -197,17 +197,26 @@ func (tr *TestRun) Bind(v any) *TestRun {
197197
return tr
198198
}
199199

200-
// BindMap is a convenience method for binding tr.Out to a map.
200+
// BindMap is a convenience method for binding tr.Out to a map
201+
// (assuming tr.Out is JSON).
201202
func (tr *TestRun) BindMap() map[string]any {
202203
m := map[string]any{}
203204
tr.Bind(&m)
204205
return m
205206
}
206207

207-
// MustReadCSV reads CSV from tr.Out and returns all records,
208+
// BindSliceMap is a convenience method for binding tr.Out
209+
// to a slice of map (assuming tr.Out is JSON).
210+
func (tr *TestRun) BindSliceMap() []map[string]any {
211+
var a []map[string]any
212+
tr.Bind(&a)
213+
return a
214+
}
215+
216+
// BindCSV reads CSV from tr.Out and returns all records,
208217
// failing the testing on any problem. Obviously the Exec call
209218
// should have specified "--csv".
210-
func (tr *TestRun) MustReadCSV() [][]string {
219+
func (tr *TestRun) BindCSV() [][]string {
211220
tr.mu.Lock()
212221
defer tr.mu.Unlock()
213222

drivers/csv/csv.go

+2-2
Original file line numberDiff line numberDiff line change
@@ -123,8 +123,8 @@ type database struct {
123123
}
124124

125125
// DB implements driver.Database.
126-
func (d *database) DB() *sql.DB {
127-
return d.impl.DB()
126+
func (d *database) DB(ctx context.Context) (*sql.DB, error) {
127+
return d.impl.DB(ctx)
128128
}
129129

130130
// SQLDriver implements driver.Database.

drivers/csv/csv_test.go

+2-2
Original file line numberDiff line numberDiff line change
@@ -108,7 +108,7 @@ func TestIngestDuplicateColumns(t *testing.T) {
108108
tr = testrun.New(ctx, t, tr).Hush()
109109
require.NoError(t, tr.Exec("--csv", ".data"))
110110
wantHeaders := []string{"actor_id", "first_name", "last_name", "last_update", "actor_id_1"}
111-
data := tr.MustReadCSV()
111+
data := tr.BindCSV()
112112
require.Equal(t, wantHeaders, data[0])
113113

114114
// Make sure the data is correct
@@ -129,6 +129,6 @@ func TestIngestDuplicateColumns(t *testing.T) {
129129
tr = testrun.New(ctx, t, tr)
130130
require.NoError(t, tr.Exec("--csv", ".data"))
131131
wantHeaders = []string{"x_actor_id", "x_first_name", "x_last_name", "x_last_update", "x_actor_id_1"}
132-
data = tr.MustReadCSV()
132+
data = tr.BindCSV()
133133
require.Equal(t, wantHeaders, data[0])
134134
}

drivers/csv/detect_header.go

+1-33
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,6 @@ package csv
22

33
import (
44
"context"
5-
"strings"
65

76
"github.com/neilotoole/sq/libsq/driver"
87

@@ -58,37 +57,6 @@ func detectHeaderRow(recs [][]string) (hasHeader bool, err error) {
5857
return false, nil
5958
}
6059

61-
// Hash generates a hash from the kinds returned by
62-
// the detectors. The detectors should already have
63-
// sampled data.
64-
//
65-
// TODO: move Hash to pkg libsq/core/kind?
66-
func Hash(detectors []*kind.Detector) (h string, err error) {
67-
if len(detectors) == 0 {
68-
return "", errz.New("no kind detectors")
69-
}
70-
71-
kinds := make([]kind.Kind, len(detectors))
72-
for i := range detectors {
73-
kinds[i], _, err = detectors[i].Detect()
74-
if err != nil {
75-
return "", err
76-
}
77-
}
78-
79-
// TODO: use an actual hash function
80-
hash := strings.Builder{}
81-
for i := range kinds {
82-
if i > 0 {
83-
hash.WriteRune('|')
84-
}
85-
hash.WriteString(kinds[i].String())
86-
}
87-
88-
h = hash.String()
89-
return h, nil
90-
}
91-
9260
func calcKindHash(recs [][]string) (string, error) {
9361
if len(recs) == 0 || len(recs[0]) == 0 {
9462
return "", errz.New("no records")
@@ -107,5 +75,5 @@ func calcKindHash(recs [][]string) (string, error) {
10775
}
10876
}
10977

110-
return Hash(detectors)
78+
return kind.Hash(detectors)
11179
}

drivers/csv/ingest.go

+6-1
Original file line numberDiff line numberDiff line change
@@ -109,7 +109,12 @@ func ingestCSV(ctx context.Context, src *source.Source, openFn source.FileOpenFu
109109
// And now we need to create the dest table in scratchDB
110110
tblDef := createTblDef(source.MonotableName, header, kinds)
111111

112-
err = scratchDB.SQLDriver().CreateTable(ctx, scratchDB.DB(), tblDef)
112+
db, err := scratchDB.DB(ctx)
113+
if err != nil {
114+
return err
115+
}
116+
117+
err = scratchDB.SQLDriver().CreateTable(ctx, db, tblDef)
113118
if err != nil {
114119
return errz.Wrap(err, "csv: failed to create dest scratch table")
115120
}

drivers/csv/insert.go

+6-1
Original file line numberDiff line numberDiff line change
@@ -123,7 +123,12 @@ func createTblDef(tblName string, colNames []string, kinds []kind.Kind) *sqlmode
123123

124124
// getRecMeta returns record.Meta to use with RecordWriter.Open.
125125
func getRecMeta(ctx context.Context, scratchDB driver.Database, tblDef *sqlmodel.TableDef) (record.Meta, error) {
126-
colTypes, err := scratchDB.SQLDriver().TableColumnTypes(ctx, scratchDB.DB(), tblDef.Name, tblDef.ColNames())
126+
db, err := scratchDB.DB(ctx)
127+
if err != nil {
128+
return nil, err
129+
}
130+
131+
colTypes, err := scratchDB.SQLDriver().TableColumnTypes(ctx, db, tblDef.Name, tblDef.ColNames())
127132
if err != nil {
128133
return nil, err
129134
}

drivers/json/import.go

+6-1
Original file line numberDiff line numberDiff line change
@@ -58,7 +58,12 @@ var (
5858

5959
// getRecMeta returns record.Meta to use with RecordWriter.Open.
6060
func getRecMeta(ctx context.Context, scratchDB driver.Database, tblDef *sqlmodel.TableDef) (record.Meta, error) {
61-
colTypes, err := scratchDB.SQLDriver().TableColumnTypes(ctx, scratchDB.DB(), tblDef.Name, tblDef.ColNames())
61+
db, err := scratchDB.DB(ctx)
62+
if err != nil {
63+
return nil, err
64+
}
65+
66+
colTypes, err := scratchDB.SQLDriver().TableColumnTypes(ctx, db, tblDef.Name, tblDef.ColNames())
6267
if err != nil {
6368
return nil, err
6469
}

0 commit comments

Comments
 (0)