Skip to content

Commit 650d7b8

Browse files
authored
Merge pull request #3851 from omnivore-app/main
Web Prod Deployment
2 parents 22cb6ca + b6e3321 commit 650d7b8

File tree

12 files changed

+269
-213
lines changed

12 files changed

+269
-213
lines changed

packages/api/src/apollo.ts

Lines changed: 26 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -4,16 +4,24 @@
44
/* eslint-disable @typescript-eslint/no-unsafe-member-access */
55
/* eslint-disable @typescript-eslint/no-unsafe-assignment */
66
/* eslint-disable @typescript-eslint/require-await */
7+
import { createPrometheusExporterPlugin } from '@bmatei/apollo-prometheus-exporter'
78
import { makeExecutableSchema } from '@graphql-tools/schema'
89
import * as Sentry from '@sentry/node'
9-
import { ContextFunction, PluginDefinition } from 'apollo-server-core'
10-
import { Express } from 'express'
10+
import {
11+
ApolloServerPluginDrainHttpServer,
12+
ContextFunction,
13+
PluginDefinition,
14+
} from 'apollo-server-core'
1115
import { ApolloServer } from 'apollo-server-express'
1216
import { ExpressContext } from 'apollo-server-express/dist/ApolloServer'
17+
import { ApolloServerPlugin } from 'apollo-server-plugin-base'
18+
import { Express } from 'express'
1319
import * as httpContext from 'express-http-context2'
20+
import type http from 'http'
1421
import * as jwt from 'jsonwebtoken'
1522
import { EntityManager } from 'typeorm'
1623
import { promisify } from 'util'
24+
import { ReadingProgressDataSource } from './datasources/reading_progress_data_source'
1725
import { appDataSource } from './data_source'
1826
import { sanitizeDirectiveTransformer } from './directives'
1927
import { env } from './env'
@@ -22,17 +30,14 @@ import { functionResolvers } from './resolvers/function_resolvers'
2230
import { ClaimsToSet, RequestContext, ResolverContext } from './resolvers/types'
2331
import ScalarResolvers from './scalars'
2432
import typeDefs from './schema'
25-
import { tracer } from './tracing'
26-
import { getClaimsByToken, setAuthInCookie } from './utils/auth'
27-
import { SetClaimsRole } from './utils/dictionary'
28-
import { logger } from './utils/logger'
29-
import { ReadingProgressDataSource } from './datasources/reading_progress_data_source'
30-
import { createPrometheusExporterPlugin } from '@bmatei/apollo-prometheus-exporter'
31-
import { ApolloServerPlugin } from 'apollo-server-plugin-base'
3233
import {
3334
countDailyServiceUsage,
3435
createServiceUsage,
3536
} from './services/service_usage'
37+
import { tracer } from './tracing'
38+
import { getClaimsByToken, setAuthInCookie } from './utils/auth'
39+
import { SetClaimsRole } from './utils/dictionary'
40+
import { logger } from './utils/logger'
3641

3742
const signToken = promisify(jwt.sign)
3843
const pubsub = createPubSubClient()
@@ -100,7 +105,10 @@ const contextFunc: ContextFunction<ExpressContext, ResolverContext> = async ({
100105
return ctx
101106
}
102107

103-
export function makeApolloServer(app: Express): ApolloServer {
108+
export function makeApolloServer(
109+
app: Express,
110+
httpServer: http.Server
111+
): ApolloServer {
104112
let schema = makeExecutableSchema({
105113
resolvers,
106114
typeDefs,
@@ -169,7 +177,14 @@ export function makeApolloServer(app: Express): ApolloServer {
169177
const apollo = new ApolloServer({
170178
schema: schema,
171179
context: contextFunc,
172-
plugins: [promExporter, usageLimitPlugin],
180+
plugins: [
181+
// Our httpServer handles incoming requests to our Express app.
182+
// Below, we tell Apollo Server to "drain" this httpServer,
183+
// enabling our servers to shut down gracefully.
184+
ApolloServerPluginDrainHttpServer({ httpServer }),
185+
promExporter,
186+
usageLimitPlugin,
187+
],
173188
formatError: (err) => {
174189
logger.info('server error', err)
175190
Sentry.captureException(err)

packages/api/src/jobs/ai/create_digest.ts

Lines changed: 134 additions & 62 deletions
Original file line numberDiff line numberDiff line change
@@ -1,26 +1,26 @@
1-
import { logger } from '../../utils/logger'
2-
import { v4 as uuid } from 'uuid'
3-
4-
import { OpenAI } from '@langchain/openai'
1+
import { JsonOutputParser } from '@langchain/core/output_parsers'
52
import { PromptTemplate } from '@langchain/core/prompts'
6-
import { LibraryItem } from '../../entity/library_item'
3+
import { OpenAI } from '@langchain/openai'
74
import {
85
htmlToSpeechFile,
96
SpeechFile,
107
SSMLOptions,
118
} from '@omnivore/text-to-speech-handler'
129
import axios from 'axios'
10+
import showdown from 'showdown'
11+
import yaml from 'yaml'
12+
import { LibraryItem } from '../../entity/library_item'
13+
import { TaskState } from '../../generated/graphql'
14+
import { redisDataSource } from '../../redis_data_source'
15+
import { Digest, writeDigest } from '../../services/digest'
1316
import {
1417
findLibraryItemsByIds,
1518
searchLibraryItems,
1619
} from '../../services/library_item'
17-
import { redisDataSource } from '../../redis_data_source'
20+
import { findDeviceTokensByUserId } from '../../services/user_device_tokens'
21+
import { logger } from '../../utils/logger'
1822
import { htmlToMarkdown } from '../../utils/parser'
19-
import yaml from 'yaml'
20-
import { JsonOutputParser } from '@langchain/core/output_parsers'
21-
import showdown from 'showdown'
22-
import { Digest, writeDigest } from '../../services/digest'
23-
import { TaskState } from '../../generated/graphql'
23+
import { sendMulticastPushNotifications } from '../../utils/sendNotification'
2424

2525
export type CreateDigestJobSchedule = 'daily' | 'weekly'
2626

@@ -73,9 +73,18 @@ interface RankedTitle {
7373
}
7474

7575
export const CREATE_DIGEST_JOB = 'create-digest'
76+
export const CRON_PATTERNS = {
77+
// every day at 10:30 UTC
78+
daily: '30 10 * * *',
79+
// every Sunday at 10:30 UTC
80+
weekly: '30 10 * * 7',
81+
}
7682

7783
let digestDefinition: DigestDefinition
7884

85+
export const getCronPattern = (schedule: CreateDigestJobSchedule) =>
86+
CRON_PATTERNS[schedule]
87+
7988
const fetchDigestDefinition = async (): Promise<DigestDefinition> => {
8089
const promptFileUrl = process.env.PROMPT_FILE_URL
8190
if (!promptFileUrl) {
@@ -131,7 +140,7 @@ const getPreferencesList = async (userId: string): Promise<LibraryItem[]> => {
131140
// Makes multiple DB queries and combines the results
132141
const getCandidatesList = async (
133142
userId: string,
134-
libraryItemIds?: string[]
143+
selectedLibraryItemIds?: string[]
135144
): Promise<LibraryItem[]> => {
136145
// use the queries from the digest definitions to lookup preferences
137146
// There should be a list of multiple queries we use. For now we can
@@ -140,18 +149,25 @@ const getCandidatesList = async (
140149
// count: 100
141150
// reason: "most recent 100 items saved over 500 words
142151

143-
if (libraryItemIds) {
144-
logger.info('Using libraryItemIds')
145-
return findLibraryItemsByIds(libraryItemIds, userId)
152+
if (selectedLibraryItemIds) {
153+
return findLibraryItemsByIds(selectedLibraryItemIds, userId)
146154
}
147155

156+
// get the existing candidate ids from cache
157+
const key = `digest:${userId}:existingCandidateIds`
158+
const existingCandidateIds = await redisDataSource.redisClient?.get(key)
159+
160+
logger.info('existingCandidateIds: ', { existingCandidateIds })
161+
148162
const candidates = await Promise.all(
149163
digestDefinition.candidateSelectors.map(async (selector) => {
150164
// use the selector to fetch items
151165
const results = await searchLibraryItems(
152166
{
153167
includeContent: true,
154-
query: selector.query,
168+
query: existingCandidateIds
169+
? `(${selector.query}) -includes:${existingCandidateIds}` // exclude the existing candidates
170+
: selector.query,
155171
size: selector.count,
156172
},
157173
userId
@@ -172,6 +188,23 @@ const getCandidatesList = async (
172188
readableContent: htmlToMarkdown(item.readableContent),
173189
})) // convert the html content to markdown
174190

191+
if (dedupedCandidates.length === 0) {
192+
logger.info('No new candidates found')
193+
194+
if (existingCandidateIds) {
195+
// reuse the existing candidates
196+
const existingIds = existingCandidateIds.split(',')
197+
return findLibraryItemsByIds(existingIds, userId)
198+
}
199+
200+
// return empty array if no existing candidates
201+
return []
202+
}
203+
204+
// store the ids in cache
205+
const candidateIds = dedupedCandidates.map((item) => item.id).join(',')
206+
await redisDataSource.redisClient?.set(key, candidateIds)
207+
175208
return dedupedCandidates
176209
}
177210

@@ -203,7 +236,7 @@ const createUserProfile = async (
203236
// it to redis
204237
const findOrCreateUserProfile = async (userId: string): Promise<string> => {
205238
// check redis for user profile, return if found
206-
const key = `userProfile:${userId}`
239+
const key = `digest:${userId}:userProfile`
207240
const existingProfile = await redisDataSource.redisClient?.get(key)
208241
if (existingProfile) {
209242
return existingProfile
@@ -266,6 +299,9 @@ const rankCandidates = async (
266299
return rankedItems
267300
}
268301

302+
const filterTopics = (rankedTopics: string[]) =>
303+
rankedTopics.filter((topic) => topic?.length > 0)
304+
269305
// Does some grouping by topic while trying to maintain ranking
270306
// adds some basic topic diversity
271307
const chooseRankedSelections = (rankedCandidates: RankedItem[]) => {
@@ -289,7 +325,6 @@ const chooseRankedSelections = (rankedCandidates: RankedItem[]) => {
289325
}
290326

291327
logger.info('rankedTopics: ', rankedTopics)
292-
logger.info('finalSelections: ', selected)
293328

294329
const finalSelections = []
295330

@@ -298,9 +333,15 @@ const chooseRankedSelections = (rankedCandidates: RankedItem[]) => {
298333
finalSelections.push(...matches)
299334
}
300335

301-
logger.info('finalSelections: ', finalSelections)
336+
logger.info(
337+
'finalSelections: ',
338+
finalSelections.map((item) => item.libraryItem.title)
339+
)
302340

303-
return { finalSelections, rankedTopics }
341+
return {
342+
finalSelections,
343+
rankedTopics: filterTopics(rankedTopics),
344+
}
304345
}
305346

306347
const summarizeItems = async (
@@ -363,7 +404,9 @@ const generateSpeechFiles = (
363404
// we should have a QA step here that does some
364405
// basic checks to make sure the summaries are good.
365406
const filterSummaries = (summaries: RankedItem[]): RankedItem[] => {
366-
return summaries.filter((item) => item.summary.length > 100)
407+
return summaries.filter(
408+
(item) => item.summary.length < item.libraryItem.readableContent.length
409+
)
367410
}
368411

369412
// we can use something more sophisticated to generate titles
@@ -376,11 +419,8 @@ const generateDescription = (
376419
summaries: RankedItem[],
377420
rankedTopics: string[]
378421
): string =>
379-
`We selected ${
380-
summaries.length
381-
} articles from your last 24 hours of saved items, covering ${rankedTopics.join(
382-
', '
383-
)}.`
422+
`We selected ${summaries.length} articles from your last 24 hours of saved items` +
423+
(rankedTopics.length ? `, covering ${rankedTopics.join(', ')}.` : '.')
384424

385425
// generate content based on the summaries
386426
const generateContent = (summaries: RankedItem[]): string =>
@@ -395,45 +435,77 @@ const generateByline = (summaries: RankedItem[]): string =>
395435
.join(', ')
396436

397437
export const createDigestJob = async (jobData: CreateDigestJobData) => {
398-
digestDefinition = await fetchDigestDefinition()
438+
try {
439+
digestDefinition = await fetchDigestDefinition()
399440

400-
const candidates = await getCandidatesList(
401-
jobData.userId,
402-
jobData.libraryItemIds
403-
)
404-
const userProfile = await findOrCreateUserProfile(jobData.userId)
405-
const rankedCandidates = await rankCandidates(candidates, userProfile)
406-
const { finalSelections, rankedTopics } =
407-
chooseRankedSelections(rankedCandidates)
441+
const candidates = await getCandidatesList(
442+
jobData.userId,
443+
jobData.libraryItemIds
444+
)
445+
if (candidates.length === 0) {
446+
logger.info('No candidates found')
447+
return writeDigest(jobData.userId, {
448+
id: jobData.id,
449+
jobState: TaskState.Succeeded,
450+
title: 'No articles found',
451+
})
452+
}
408453

409-
const summaries = await summarizeItems(finalSelections)
454+
const userProfile = await findOrCreateUserProfile(jobData.userId)
455+
const rankedCandidates = await rankCandidates(candidates, userProfile)
456+
const { finalSelections, rankedTopics } =
457+
chooseRankedSelections(rankedCandidates)
410458

411-
const filteredSummaries = filterSummaries(summaries)
459+
const summaries = await summarizeItems(finalSelections)
412460

413-
const speechFiles = generateSpeechFiles(filteredSummaries, {
414-
...jobData,
415-
primaryVoice: jobData.voices?.[0],
416-
secondaryVoice: jobData.voices?.[1],
417-
})
418-
const title = generateTitle(summaries)
419-
const digest: Digest = {
420-
id: jobData.id,
421-
title,
422-
content: generateContent(summaries),
423-
urlsToAudio: [],
424-
jobState: TaskState.Succeeded,
425-
speechFiles,
426-
chapters: filteredSummaries.map((item, index) => ({
427-
title: item.libraryItem.title,
428-
id: item.libraryItem.id,
429-
url: item.libraryItem.originalUrl,
430-
thumbnail: item.libraryItem.thumbnail ?? undefined,
431-
wordCount: speechFiles[index].wordCount,
432-
})),
433-
createdAt: new Date(),
434-
description: generateDescription(summaries, rankedTopics),
435-
byline: generateByline(summaries),
436-
}
461+
const filteredSummaries = filterSummaries(summaries)
462+
463+
const speechFiles = generateSpeechFiles(filteredSummaries, {
464+
...jobData,
465+
primaryVoice: jobData.voices?.[0],
466+
secondaryVoice: jobData.voices?.[1],
467+
})
468+
const title = generateTitle(summaries)
469+
const digest: Digest = {
470+
id: jobData.id,
471+
title,
472+
content: generateContent(summaries),
473+
jobState: TaskState.Succeeded,
474+
speechFiles,
475+
chapters: filteredSummaries.map((item, index) => ({
476+
title: item.libraryItem.title,
477+
id: item.libraryItem.id,
478+
url: item.libraryItem.originalUrl,
479+
thumbnail: item.libraryItem.thumbnail ?? undefined,
480+
wordCount: speechFiles[index].wordCount,
481+
})),
482+
createdAt: new Date(),
483+
description: generateDescription(summaries, rankedTopics),
484+
byline: generateByline(summaries),
485+
urlsToAudio: [],
486+
}
487+
488+
await writeDigest(jobData.userId, digest)
489+
} catch (error) {
490+
logger.error('createDigestJob error', error)
437491

438-
await writeDigest(jobData.userId, digest)
492+
await writeDigest(jobData.userId, {
493+
id: jobData.id,
494+
jobState: TaskState.Failed,
495+
})
496+
} finally {
497+
// send notification
498+
const tokens = await findDeviceTokensByUserId(jobData.userId)
499+
if (tokens.length > 0) {
500+
const message = {
501+
notification: {
502+
title: 'Digest ready',
503+
body: 'Your digest is ready to listen',
504+
},
505+
tokens: tokens.map((token) => token.token),
506+
}
507+
508+
await sendMulticastPushNotifications(jobData.userId, message, 'reminder')
509+
}
510+
}
439511
}

packages/api/src/resolvers/article/index.ts

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -681,7 +681,7 @@ export const searchResolver = authorized<
681681
from: Number(startCursor),
682682
size: first + 1, // fetch one more item to get next cursor
683683
includePending: true,
684-
includeContent: !!params.includeContent,
684+
includeContent: params.includeContent ?? true, // by default include content for offline use for now
685685
includeDeleted: params.query?.includes('in:trash'),
686686
query: params.query,
687687
useFolders: params.query?.includes('use:folders'),
@@ -786,6 +786,7 @@ export const updatesSinceResolver = authorized<
786786
size: size + 1, // fetch one more item to get next cursor
787787
includeDeleted: true,
788788
query,
789+
includeContent: true, // by default include content for offline use for now
789790
},
790791
uid
791792
)

0 commit comments

Comments
 (0)