Skip to content

Commit cae8ba9

Browse files
committed
Both uploading and chat work
1 parent fe68558 commit cae8ba9

File tree

8 files changed

+5887
-2167
lines changed

8 files changed

+5887
-2167
lines changed

app/api/upload/route.ts

+96-50
Original file line numberDiff line numberDiff line change
@@ -34,71 +34,117 @@ export async function POST(req: Request) {
3434
}
3535

3636
console.log('JSON parsed successfully, items:', jsonData.length);
37-
const results = [];
37+
interface ProcessedItem {
38+
id: string;
39+
source: string;
40+
chapter: string;
41+
}
42+
const results: ProcessedItem[] = [];
3843
const totalItems = jsonData.length;
3944

40-
for (let i = 0; i < jsonData.length; i++) {
41-
const item = jsonData[i];
42-
if (!item.source || !item.chapter || !item.text) {
43-
console.log('Skipping invalid item:', item);
44-
continue;
45-
}
45+
// Create a TransformStream for streaming responses
46+
const stream = new TransformStream();
47+
const writer = stream.writable.getWriter();
48+
const encoder = new TextEncoder();
4649

47-
console.log('Processing item', i + 1, 'of', totalItems);
48-
50+
// Start processing in the background
51+
(async () => {
4952
try {
50-
// Insert into resources table
51-
console.log('Inserting into resources table...');
52-
const [resource] = await db
53-
.insert(resources)
54-
.values({
55-
content: item.text,
56-
source: item.source,
57-
chapter: item.chapter,
58-
})
59-
.returning();
53+
// Process items in batches to show progress
54+
const batchSize = 5;
55+
for (let i = 0; i < jsonData.length; i += batchSize) {
56+
const batch = jsonData.slice(i, i + batchSize);
57+
58+
for (const item of batch) {
59+
if (!item.source || !item.chapter || !item.text) {
60+
console.log('Skipping invalid item:', item);
61+
continue;
62+
}
6063

61-
console.log('Resource inserted:', resource.id);
64+
try {
65+
// Insert into resources table
66+
const [resource] = await db
67+
.insert(resources)
68+
.values({
69+
content: item.text,
70+
source: item.source,
71+
chapter: item.chapter,
72+
})
73+
.returning();
6274

63-
// Generate embeddings
64-
console.log('Generating embeddings...');
65-
const embeddingResults = await generateEmbeddings(item.text);
66-
console.log('Embeddings generated:', embeddingResults.length);
67-
68-
// Insert embeddings
69-
console.log('Inserting embeddings...');
70-
for (const embeddingResult of embeddingResults) {
71-
await db.insert(embeddings).values({
72-
resourceId: resource.id,
73-
content: embeddingResult.content,
74-
embedding: embeddingResult.embedding,
75-
});
76-
}
75+
// Generate embeddings
76+
const embeddingResults = await generateEmbeddings(item.text);
77+
78+
// Insert embeddings
79+
for (const embeddingResult of embeddingResults) {
80+
await db.insert(embeddings).values({
81+
resourceId: resource.id,
82+
content: embeddingResult.content,
83+
embedding: embeddingResult.embedding,
84+
});
85+
}
86+
87+
results.push({
88+
id: resource.id,
89+
source: item.source,
90+
chapter: item.chapter,
91+
});
7792

78-
results.push({
79-
id: resource.id,
80-
source: item.source,
81-
chapter: item.chapter,
82-
});
93+
} catch (dbError) {
94+
console.error('Database error processing item:', dbError);
95+
continue;
96+
}
97+
}
8398

84-
} catch (dbError) {
85-
console.error('Database error processing item:', dbError);
86-
// Continue with next item even if this one fails
87-
continue;
99+
// Send progress update after each batch
100+
const processed = Math.min(i + batchSize, jsonData.length);
101+
console.log(`Processed ${processed} of ${totalItems} items`);
102+
103+
await writer.write(encoder.encode(
104+
`data: ${JSON.stringify({
105+
type: 'progress',
106+
processed,
107+
total: totalItems,
108+
})}\n\n`
109+
));
110+
}
111+
112+
// Send final completion message
113+
await writer.write(encoder.encode(
114+
`data: ${JSON.stringify({
115+
type: 'complete',
116+
success: true,
117+
processed: results.length,
118+
total: totalItems,
119+
results,
120+
})}\n\n`
121+
));
122+
} catch (error) {
123+
console.error('Error during processing:', error);
124+
await writer.write(encoder.encode(
125+
`data: ${JSON.stringify({
126+
type: 'error',
127+
error: 'Error processing file',
128+
details: error instanceof Error ? error.message : 'Unknown error',
129+
})}\n\n`
130+
));
131+
} finally {
132+
await writer.close();
88133
}
89-
}
134+
})();
90135

91-
console.log('Upload process completed');
92-
return NextResponse.json({
93-
success: true,
94-
processed: results.length,
95-
total: totalItems,
96-
results
136+
return new Response(stream.readable, {
137+
headers: {
138+
'Content-Type': 'text/event-stream',
139+
'Cache-Control': 'no-cache',
140+
'Connection': 'keep-alive',
141+
},
97142
});
98143

99144
} catch (error) {
100145
console.error('Error processing file:', error);
101146
return NextResponse.json({
147+
type: 'error',
102148
error: 'Error processing file',
103149
details: error instanceof Error ? error.message : 'Unknown error'
104150
}, { status: 500 });

components/FileUpload.tsx

+43-16
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,14 @@ export function FileUpload() {
1212
const [processedItems, setProcessedItems] = useState(0);
1313
const abortControllerRef = useRef<AbortController | null>(null);
1414

15+
const resetState = () => {
16+
setIsUploading(false);
17+
setProgress(0);
18+
setTotalItems(0);
19+
setProcessedItems(0);
20+
abortControllerRef.current = null;
21+
};
22+
1523
const handleFileUpload = async (event: React.ChangeEvent<HTMLInputElement>) => {
1624
const file = event.target.files?.[0];
1725
if (!file) return;
@@ -20,6 +28,7 @@ export function FileUpload() {
2028
setIsUploading(true);
2129
setProgress(0);
2230
setProcessedItems(0);
31+
setTotalItems(0);
2332
abortControllerRef.current = new AbortController();
2433

2534
const formData = new FormData();
@@ -38,30 +47,48 @@ export function FileUpload() {
3847
throw new Error('Upload failed');
3948
}
4049

41-
const data = await response.json();
42-
console.log('Upload completed:', data);
43-
44-
if (data.success) {
45-
setProcessedItems(data.processed);
46-
setTotalItems(data.total);
47-
setProgress(100);
48-
toast.success(`Successfully processed ${data.processed} items`);
49-
} else {
50-
toast.error(data.error || 'Error uploading file');
50+
const reader = response.body?.getReader();
51+
if (!reader) {
52+
throw new Error('No reader available');
5153
}
5254

55+
while (true) {
56+
const { done, value } = await reader.read();
57+
if (done) break;
58+
59+
const text = new TextDecoder().decode(value);
60+
const lines = text.split('\n');
61+
62+
for (const line of lines) {
63+
if (line.startsWith('data: ')) {
64+
const data = JSON.parse(line.slice(6));
65+
console.log('Received data:', data);
66+
67+
if (data.type === 'progress') {
68+
setProcessedItems(data.processed);
69+
setTotalItems(data.total);
70+
setProgress((data.processed / data.total) * 100);
71+
} else if (data.type === 'complete') {
72+
setProcessedItems(data.processed);
73+
setTotalItems(data.total);
74+
setProgress(100);
75+
toast.success(`Successfully processed ${data.processed} items`);
76+
setTimeout(resetState, 2000);
77+
} else if (data.type === 'error') {
78+
toast.error(data.error || 'Error uploading file');
79+
resetState();
80+
}
81+
}
82+
}
83+
}
5384
} catch (error) {
5485
console.error('Upload error:', error);
5586
if (error instanceof Error && error.name === 'AbortError') {
5687
toast.info('Upload cancelled');
5788
} else {
5889
toast.error('Error uploading file');
5990
}
60-
} finally {
61-
setIsUploading(false);
62-
setProgress(0);
63-
setProcessedItems(0);
64-
abortControllerRef.current = null;
91+
resetState();
6592
}
6693
};
6794

@@ -98,7 +125,7 @@ export function FileUpload() {
98125
</Button>
99126
)}
100127
</div>
101-
{isUploading && (
128+
{(isUploading || progress > 0) && (
102129
<div className="w-full max-w-md space-y-2">
103130
<Progress value={progress} className="w-full" />
104131
<p className="text-sm text-center text-gray-500">

install.sh

+4
Original file line numberDiff line numberDiff line change
@@ -41,4 +41,8 @@ pnpm db:push
4141
echo "🤖 Installing AI dependencies..."
4242
pnpm add ai @ai-sdk/react @ai-sdk/openai @radix-ui/react-progress
4343

44+
# Install test dependencies
45+
echo "🧪 Installing test dependencies..."
46+
pnpm add -D @types/jest jest ts-jest @jest/globals babel-jest @babel/core @babel/preset-env @babel/plugin-transform-runtime crypto-browserify
47+
4448
echo "✨ Installation complete! You can now start the development server with 'pnpm dev'"

jest.config.js

+19
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
/** @type {import('ts-jest').JestConfigWithTsJest} */
2+
module.exports = {
3+
preset: 'ts-jest',
4+
testEnvironment: 'node',
5+
moduleFileExtensions: ['ts', 'tsx', 'js', 'jsx', 'json', 'mjs'],
6+
transform: {
7+
'^.+\\.tsx?$': ['ts-jest', { useESM: true }],
8+
'^.+\\.m?js$': 'babel-jest'
9+
},
10+
moduleNameMapper: {
11+
'^@/(.*)$': '<rootDir>/$1',
12+
'^nanoid$': '<rootDir>/node_modules/nanoid/nanoid.js',
13+
'^node:crypto$': '<rootDir>/node_modules/crypto-browserify/index.js'
14+
},
15+
transformIgnorePatterns: [
16+
'node_modules/(?!(nanoid|clsx|tailwind-merge|@ai-sdk)/)'
17+
],
18+
extensionsToTreatAsEsm: ['.ts'],
19+
};

lib/ai/embedding.test.ts

+71
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,71 @@
1+
import { describe, expect, test, jest } from '@jest/globals';
2+
import { generateChunks } from './embedding';
3+
4+
// Mock the dependencies
5+
jest.mock('nanoid', () => ({
6+
customAlphabet: () => () => 'test-id'
7+
}));
8+
9+
jest.mock('clsx', () => ({
10+
clsx: () => ''
11+
}));
12+
13+
jest.mock('tailwind-merge', () => ({
14+
twMerge: () => ''
15+
}));
16+
17+
jest.mock('@/lib/db', () => ({}));
18+
jest.mock('@/lib/env.mjs', () => ({}));
19+
jest.mock('ai', () => ({}));
20+
21+
describe('Text Chunking', () => {
22+
test('should split Chinese text into sentences correctly', () => {
23+
const input = '高祖,沛丰邑中阳里人,姓刘氏,字季。父曰太公,母曰刘媪。其先刘媪尝息大泽之陂,梦与神遇。是时雷电晦冥,太公往视,则见蛟龙於其上。已而有身,遂产高祖。';
24+
const expected = [
25+
'高祖,沛丰邑中阳里人,姓刘氏,字季',
26+
'父曰太公,母曰刘媪',
27+
'其先刘媪尝息大泽之陂,梦与神遇',
28+
'是时雷电晦冥,太公往视,则见蛟龙於其上',
29+
'已而有身,遂产高祖'
30+
];
31+
32+
const result = generateChunks(input);
33+
expect(result).toEqual(expected);
34+
});
35+
36+
test('should handle mixed Chinese and English punctuation', () => {
37+
const input = '这是第一句。This is the second sentence! 这是第三句?';
38+
const expected = [
39+
'这是第一句',
40+
'This is the second sentence',
41+
'这是第三句'
42+
];
43+
44+
const result = generateChunks(input);
45+
expect(result).toEqual(expected);
46+
});
47+
48+
test('should handle empty input', () => {
49+
const input = '';
50+
const expected: string[] = [];
51+
52+
const result = generateChunks(input);
53+
expect(result).toEqual(expected);
54+
});
55+
56+
test('should handle input with only whitespace', () => {
57+
const input = ' \n \t ';
58+
const expected: string[] = [];
59+
60+
const result = generateChunks(input);
61+
expect(result).toEqual(expected);
62+
});
63+
64+
test('should trim whitespace from chunks', () => {
65+
const input = ' 第一句。 第二句 。 第三句 ';
66+
const expected = ['第一句', '第二句', '第三句'];
67+
68+
const result = generateChunks(input);
69+
expect(result).toEqual(expected);
70+
});
71+
});

lib/ai/embedding.ts

+5-2
Original file line numberDiff line numberDiff line change
@@ -9,10 +9,13 @@ const embeddingModel = openai.embedding('text-embedding-ada-002');
99
const generateChunks = (input: string): string[] => {
1010
return input
1111
.trim()
12-
.split(/[.]/)
13-
.filter(i => i !== '');
12+
.split(/[!?]/)
13+
.filter(i => i.trim().length > 0)
14+
.map(i => i.trim());
1415
};
1516

17+
export { generateChunks };
18+
1619
export const generateEmbeddings = async (
1720
value: string,
1821
): Promise<Array<{ embedding: number[]; content: string }>> => {

0 commit comments

Comments
 (0)