-
Notifications
You must be signed in to change notification settings - Fork 6
/
Copy pathcsv-loader.ts
128 lines (119 loc) · 3.88 KB
/
csv-loader.ts
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
import { createReadStream, existsSync } from "node:fs";
import { fileURLToPath } from "node:url";
import { relative } from "pathe";
import type { Loader, LoaderContext } from "astro/loaders";
import Papa from "papaparse";
import { finished } from "node:stream/promises";
import { AstroError } from "astro/errors";
export interface CSVLoaderOptions {
/** Path to the CSV file */
fileName: string;
/**
* Define how the header values should be transformed into field names.
* By default the values are camelized. Pass `false` to leave unchanged.
* Pass a function to transform the header values yourself.
*/
transformHeader?: false | ((header: string, index: number) => string);
/**
* The field to use as an ID. Values in the column must be unique.
* If the header is transformed, it is the value _after_ transformation.
* Default is the first column.
* */
idField?: string;
/** Options passed to the CSV parser */
parserOptions?: Omit<
Papa.ParseConfig,
"header" | "dynamicTyping" | "transformHeader" | "step" | "complete"
>;
}
const camelize = (str: string) =>
str
.split(/[-_\s]+/)
.map(
(word, index) =>
`${index === 0 ? word.charAt(0).toLowerCase() : word.charAt(0).toUpperCase()}${word.slice(1)}`,
)
.join("");
/**
* Loads entries from a CSV file. The file must contain an array of objects that contain unique `id` fields, or an object with string keys.
*/
export function csvLoader({
fileName,
idField,
transformHeader = camelize,
parserOptions,
}: CSVLoaderOptions): Loader {
async function syncData(
filePath: string,
{ logger, parseData, store, config }: LoaderContext,
) {
const relativePath = relative(fileURLToPath(config.root), filePath);
const csvStream = Papa.parse(Papa.NODE_STREAM_INPUT, {
dynamicTyping: true,
...parserOptions,
header: true,
transformHeader: transformHeader === false ? undefined : transformHeader,
});
createReadStream(filePath).pipe(csvStream);
await new Promise<void>((resolve, reject) => {
let count = 0;
let row = 0;
store.clear();
csvStream.on("end", () => {
logger.info(`Loaded ${count} entries from ${fileName}`);
resolve();
});
csvStream.on("error", (error) => {
logger.error("Error reading data");
reject(error);
});
csvStream.on("data", async (data) => {
if (!idField) {
idField = Object.keys(data)[0];
logger.info(`No ID field specified, using first column: ${idField}`);
if (!idField) {
csvStream.end();
reject("No ID field found in CSV file");
return;
}
}
row++;
if (!(idField in data) || !(data[idField] && data[idField] !== 0)) {
logger.warn(`No ID (${idField}) found in row ${row}. Skipping.`);
return;
}
count++;
const id = String(data[idField]);
const parsedData = await parseData({
id,
data,
filePath: relativePath,
});
store.set({ id, data: parsedData, filePath: relativePath });
});
}).catch((error) => {
logger.error("Error reading data: " + error);
throw new AstroError("Aborted loading CSV data");
});
}
return {
name: "csv-loader",
load: async (options) => {
const { config, logger, watcher } = options;
logger.info(`Loading CSV data from ${fileName}`);
const url = new URL(fileName, config.root);
if (!existsSync(url)) {
logger.error(`File not found: ${fileName}`);
return;
}
const filePath = fileURLToPath(url);
await syncData(filePath, options);
watcher?.on("change", async (changedPath) => {
if (changedPath === filePath) {
logger.info(`Reloading data from ${fileName}`);
await syncData(filePath, options);
}
});
},
};
}