Skip to content

Commit 590f11c

Browse files
committed
Decode HTML entities in article titles
1 parent 055879b commit 590f11c

File tree

3 files changed

+24
-5
lines changed

3 files changed

+24
-5
lines changed

pnpm-lock.yaml

Lines changed: 9 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

server/package.json

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
"dependencies": {
1515
"@paralleldrive/cuid2": "^2.2.2",
1616
"cheerio": "1.0.0-rc.12",
17+
"entities": "^5.0.0",
1718
"fast-xml-parser": "^4.4.0",
1819
"hyper-express": "^6.16.1",
1920
"kysely": "^0.27.3",

server/src/feed.ts

Lines changed: 14 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ import { NewArticle } from "./schemas/public/Article.js";
44
import { createHash } from "node:crypto";
55
import * as cheerio from "cheerio";
66
import { z } from "zod";
7+
import * as entities from "entities";
78

89
type DownloadedArticle = Omit<NewArticle, "id" | "feed_id">;
910

@@ -52,6 +53,7 @@ const RssChannel = z.object({
5253
z.object({
5354
title: z.optional(TextNode),
5455
description: z.optional(TextNode),
56+
"content:encoded": z.optional(TextNode),
5557
link: z.optional(TextNode),
5658
guid: z.optional(
5759
TextNode.extend({
@@ -166,15 +168,15 @@ export async function downloadFeed(feedUrl: string): Promise<DownloadedFeed> {
166168
const articles: DownloadedArticle[] = parsedFeed.items.map((item) => ({
167169
article_id: item.id,
168170
content: getContent(item) ?? "",
169-
title: item.title,
171+
title: entities.decode(item.title),
170172
link: item.link,
171173
published: item.updated,
172174
}));
173175

174176
console.debug(`Processed feed ${feedUrl}`);
175177

176178
return {
177-
title: parsedFeed.title,
179+
title: entities.decode(parsedFeed.title),
178180
link: parsedFeed.link,
179181
icon,
180182
articles,
@@ -242,7 +244,10 @@ function parseFeed(xml: string): ParsedFeed {
242244
id:
243245
item["$$rdf:about"] ??
244246
item.link?.$text ??
245-
hashStrings(item.title?.$text ?? "", item.description?.$text ?? ""),
247+
hashStrings(
248+
item.title?.$text ?? "",
249+
item.description?.$text ?? "",
250+
),
246251
title: item.title?.$text ?? "",
247252
link: item.link?.$text ?? "",
248253
updated: new Date(item["dc:date"].$text),
@@ -257,11 +262,15 @@ function parseFeed(xml: string): ParsedFeed {
257262
id:
258263
item.guid?.$text ??
259264
item.link?.$text ??
260-
hashStrings(item.title?.$text ?? "", item.description?.$text ?? ""),
265+
hashStrings(
266+
item.title?.$text ?? "",
267+
item.description?.$text ?? "",
268+
),
261269
title: item.title?.$text ?? "",
262270
link: item.link?.$text ?? "",
263271
updated: item.pubDate ? new Date(item.pubDate.$text) : new Date(),
264-
content: item.description?.$text ?? "",
272+
content:
273+
item["content:encoded"]?.$text ?? item.description?.$text ?? "",
265274
}));
266275
}
267276

0 commit comments

Comments
 (0)