Skip to content

Commit 9627957

Browse files
feat: Add ability to detect patches which are present in a file (#2633)
* feat: Add ability to detect patches which are present in a file * chore: export patchDetector function * fix: Make sure we don't attempt to call toJson on binary content --------- Co-authored-by: Christopher Fox <[email protected]>
1 parent f98f852 commit 9627957

File tree

5 files changed

+262
-3
lines changed

5 files changed

+262
-3
lines changed

src/patcher/from-docx.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ import { appendRelationship, getNextRelationshipIndex } from "./relationship-man
1717
import { appendContentType } from "./content-types-manager";
1818

1919
// eslint-disable-next-line functional/prefer-readonly-type
20-
type InputDataType = Buffer | string | number[] | Uint8Array | ArrayBuffer | Blob | NodeJS.ReadableStream;
20+
export type InputDataType = Buffer | string | number[] | Uint8Array | ArrayBuffer | Blob | NodeJS.ReadableStream;
2121

2222
export const PatchType = {
2323
DOCUMENT: "file",

src/patcher/index.ts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1 +1,2 @@
11
export * from "./from-docx";
2+
export * from "./patch-detector";

src/patcher/patch-detector.spec.ts

Lines changed: 225 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,225 @@
1+
import { afterEach, beforeEach, describe, expect, it, vi } from "vitest";
2+
import JSZip from "jszip";
3+
import { patchDetector } from "./patch-detector";
4+
5+
const MOCK_XML = `
6+
<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
7+
<w:document xmlns:wpc="http://schemas.microsoft.com/office/word/2010/wordprocessingCanvas"
8+
xmlns:cx="http://schemas.microsoft.com/office/drawing/2014/chartex"
9+
xmlns:cx1="http://schemas.microsoft.com/office/drawing/2015/9/8/chartex"
10+
xmlns:cx2="http://schemas.microsoft.com/office/drawing/2015/10/21/chartex"
11+
xmlns:cx3="http://schemas.microsoft.com/office/drawing/2016/5/9/chartex"
12+
xmlns:cx4="http://schemas.microsoft.com/office/drawing/2016/5/10/chartex"
13+
xmlns:cx5="http://schemas.microsoft.com/office/drawing/2016/5/11/chartex"
14+
xmlns:cx6="http://schemas.microsoft.com/office/drawing/2016/5/12/chartex"
15+
xmlns:cx7="http://schemas.microsoft.com/office/drawing/2016/5/13/chartex"
16+
xmlns:cx8="http://schemas.microsoft.com/office/drawing/2016/5/14/chartex"
17+
xmlns:mc="http://schemas.openxmlformats.org/markup-compatibility/2006"
18+
xmlns:aink="http://schemas.microsoft.com/office/drawing/2016/ink"
19+
xmlns:am3d="http://schemas.microsoft.com/office/drawing/2017/model3d"
20+
xmlns:o="urn:schemas-microsoft-com:office:office"
21+
xmlns:oel="http://schemas.microsoft.com/office/2019/extlst"
22+
xmlns:r="http://schemas.openxmlformats.org/officeDocument/2006/relationships"
23+
xmlns:m="http://schemas.openxmlformats.org/officeDocument/2006/math"
24+
xmlns:v="urn:schemas-microsoft-com:vml"
25+
xmlns:wp14="http://schemas.microsoft.com/office/word/2010/wordprocessingDrawing"
26+
xmlns:wp="http://schemas.openxmlformats.org/drawingml/2006/wordprocessingDrawing"
27+
xmlns:w10="urn:schemas-microsoft-com:office:word"
28+
xmlns:w="http://schemas.openxmlformats.org/wordprocessingml/2006/main"
29+
xmlns:w14="http://schemas.microsoft.com/office/word/2010/wordml"
30+
xmlns:w15="http://schemas.microsoft.com/office/word/2012/wordml"
31+
xmlns:w16cex="http://schemas.microsoft.com/office/word/2018/wordml/cex"
32+
xmlns:w16cid="http://schemas.microsoft.com/office/word/2016/wordml/cid"
33+
xmlns:w16="http://schemas.microsoft.com/office/word/2018/wordml"
34+
xmlns:w16sdtdh="http://schemas.microsoft.com/office/word/2020/wordml/sdtdatahash"
35+
xmlns:w16se="http://schemas.microsoft.com/office/word/2015/wordml/symex"
36+
xmlns:wpg="http://schemas.microsoft.com/office/word/2010/wordprocessingGroup"
37+
xmlns:wpi="http://schemas.microsoft.com/office/word/2010/wordprocessingInk"
38+
xmlns:wne="http://schemas.microsoft.com/office/word/2006/wordml"
39+
xmlns:wps="http://schemas.microsoft.com/office/word/2010/wordprocessingShape">
40+
<w:body>
41+
<w:p w14:paraId="2499FE9F" w14:textId="0A3D130F" w:rsidR="00B51233"
42+
w:rsidRDefault="007B52ED" w:rsidP="007B52ED">
43+
<w:pPr>
44+
<w:pStyle w:val="Title" />
45+
</w:pPr>
46+
<w:r>
47+
<w:t>Hello World</w:t>
48+
</w:r>
49+
</w:p>
50+
<w:p w14:paraId="6410D9A0" w14:textId="7579AB49" w:rsidR="007B52ED"
51+
w:rsidRDefault="007B52ED" />
52+
<w:p w14:paraId="57ACF964" w14:textId="315D7A05" w:rsidR="007B52ED"
53+
w:rsidRDefault="007B52ED">
54+
<w:r>
55+
<w:t>Hello {{name}},</w:t>
56+
</w:r>
57+
<w:r w:rsidR="008126CB">
58+
<w:t xml:space="preserve"> how are you?</w:t>
59+
</w:r>
60+
</w:p>
61+
<w:p w14:paraId="38C7DF4A" w14:textId="66CDEC9A" w:rsidR="007B52ED"
62+
w:rsidRDefault="007B52ED" />
63+
<w:p w14:paraId="04FABE2B" w14:textId="3DACA001" w:rsidR="007B52ED"
64+
w:rsidRDefault="007B52ED">
65+
<w:r>
66+
<w:t>{{paragraph_replace}}</w:t>
67+
</w:r>
68+
</w:p>
69+
<w:p w14:paraId="7AD7975D" w14:textId="77777777" w:rsidR="00EF161F"
70+
w:rsidRDefault="00EF161F" />
71+
<w:p w14:paraId="3BD6D75A" w14:textId="19AE3121" w:rsidR="00EF161F"
72+
w:rsidRDefault="00EF161F">
73+
<w:r>
74+
<w:t>{{table}}</w:t>
75+
</w:r>
76+
</w:p>
77+
<w:p w14:paraId="76023962" w14:textId="4E606AB9" w:rsidR="007B52ED"
78+
w:rsidRDefault="007B52ED" />
79+
<w:tbl>
80+
<w:tblPr>
81+
<w:tblStyle w:val="TableGrid" />
82+
<w:tblW w:w="0" w:type="auto" />
83+
<w:tblLook w:val="04A0" w:firstRow="1" w:lastRow="0" w:firstColumn="1"
84+
w:lastColumn="0" w:noHBand="0" w:noVBand="1" />
85+
</w:tblPr>
86+
<w:tblGrid>
87+
<w:gridCol w:w="3003" />
88+
<w:gridCol w:w="3003" />
89+
<w:gridCol w:w="3004" />
90+
</w:tblGrid>
91+
<w:tr w:rsidR="00EF161F" w14:paraId="1DEC5955" w14:textId="77777777" w:rsidTr="00EF161F">
92+
<w:tc>
93+
<w:tcPr>
94+
<w:tcW w:w="3003" w:type="dxa" />
95+
</w:tcPr>
96+
<w:p w14:paraId="54DA5587" w14:textId="625BAC60" w:rsidR="00EF161F"
97+
w:rsidRDefault="00EF161F">
98+
<w:r>
99+
<w:t>{{table_heading_1}}</w:t>
100+
</w:r>
101+
</w:p>
102+
</w:tc>
103+
<w:tc>
104+
<w:tcPr>
105+
<w:tcW w:w="3003" w:type="dxa" />
106+
</w:tcPr>
107+
<w:p w14:paraId="57100910" w14:textId="71FD5616" w:rsidR="00EF161F"
108+
w:rsidRDefault="00EF161F" />
109+
</w:tc>
110+
<w:tc>
111+
<w:tcPr>
112+
<w:tcW w:w="3004" w:type="dxa" />
113+
</w:tcPr>
114+
<w:p w14:paraId="1D388FAB" w14:textId="77777777" w:rsidR="00EF161F"
115+
w:rsidRDefault="00EF161F" />
116+
</w:tc>
117+
</w:tr>
118+
<w:tr w:rsidR="00EF161F" w14:paraId="0F53D2DC" w14:textId="77777777" w:rsidTr="00EF161F">
119+
<w:tc>
120+
<w:tcPr>
121+
<w:tcW w:w="3003" w:type="dxa" />
122+
</w:tcPr>
123+
<w:p w14:paraId="0F2BCCED" w14:textId="3C3B6706" w:rsidR="00EF161F"
124+
w:rsidRDefault="00EF161F">
125+
<w:r>
126+
<w:t>Item: {{item_1}}</w:t>
127+
</w:r>
128+
</w:p>
129+
</w:tc>
130+
<w:tc>
131+
<w:tcPr>
132+
<w:tcW w:w="3003" w:type="dxa" />
133+
</w:tcPr>
134+
<w:p w14:paraId="1E6158AC" w14:textId="77777777" w:rsidR="00EF161F"
135+
w:rsidRDefault="00EF161F" />
136+
</w:tc>
137+
<w:tc>
138+
<w:tcPr>
139+
<w:tcW w:w="3004" w:type="dxa" />
140+
</w:tcPr>
141+
<w:p w14:paraId="17937748" w14:textId="77777777" w:rsidR="00EF161F"
142+
w:rsidRDefault="00EF161F" />
143+
</w:tc>
144+
</w:tr>
145+
<w:tr w:rsidR="00EF161F" w14:paraId="781DAC1A" w14:textId="77777777" w:rsidTr="00EF161F">
146+
<w:tc>
147+
<w:tcPr>
148+
<w:tcW w:w="3003" w:type="dxa" />
149+
</w:tcPr>
150+
<w:p w14:paraId="1DCD0343" w14:textId="77777777" w:rsidR="00EF161F"
151+
w:rsidRDefault="00EF161F" />
152+
</w:tc>
153+
<w:tc>
154+
<w:tcPr>
155+
<w:tcW w:w="3003" w:type="dxa" />
156+
</w:tcPr>
157+
<w:p w14:paraId="5D02E3CD" w14:textId="77777777" w:rsidR="00EF161F"
158+
w:rsidRDefault="00EF161F" />
159+
</w:tc>
160+
<w:tc>
161+
<w:tcPr>
162+
<w:tcW w:w="3004" w:type="dxa" />
163+
</w:tcPr>
164+
<w:p w14:paraId="52EA0DBB" w14:textId="77777777" w:rsidR="00EF161F"
165+
w:rsidRDefault="00EF161F" />
166+
</w:tc>
167+
</w:tr>
168+
</w:tbl>
169+
<w:p w14:paraId="47CD1FBC" w14:textId="23474CBC" w:rsidR="007B52ED"
170+
w:rsidRDefault="007B52ED" />
171+
<w:p w14:paraId="0ACCEE90" w14:textId="67907499" w:rsidR="00EF161F"
172+
w:rsidRDefault="0077578F">
173+
<w:r>
174+
<w:t>{{image_test}}</w:t>
175+
</w:r>
176+
</w:p>
177+
<w:p w14:paraId="23FA9862" w14:textId="77777777" w:rsidR="0077578F"
178+
w:rsidRDefault="0077578F" />
179+
<w:p w14:paraId="01578F2F" w14:textId="3BDC6C85" w:rsidR="007B52ED"
180+
w:rsidRDefault="007B52ED">
181+
<w:r>
182+
<w:t>Thank you</w:t>
183+
</w:r>
184+
</w:p>
185+
<w:sectPr w:rsidR="007B52ED" w:rsidSect="0072043F">
186+
<w:headerReference w:type="default" r:id="rId6" />
187+
<w:footerReference w:type="default" r:id="rId7" />
188+
<w:pgSz w:w="11900" w:h="16840" />
189+
<w:pgMar w:top="1440" w:right="1440" w:bottom="1440" w:left="1440" w:header="708"
190+
w:footer="708" w:gutter="0" />
191+
<w:cols w:space="708" />
192+
<w:docGrid w:linePitch="360" />
193+
</w:sectPr>
194+
</w:body>
195+
</w:document>
196+
`;
197+
198+
describe("patch-detector", () => {
199+
describe("patchDetector", () => {
200+
describe("document.xml and [Content_Types].xml", () => {
201+
beforeEach(() => {
202+
vi.spyOn(JSZip, "loadAsync").mockReturnValue(
203+
new Promise<JSZip>((resolve) => {
204+
const zip = new JSZip();
205+
206+
zip.file("word/document.xml", MOCK_XML);
207+
zip.file("[Content_Types].xml", `<?xml version="1.0" encoding="UTF-8" standalone="yes"?>`);
208+
resolve(zip);
209+
}),
210+
);
211+
});
212+
213+
afterEach(() => {
214+
vi.restoreAllMocks();
215+
});
216+
217+
it("should patch the document", async () => {
218+
const output = await patchDetector({
219+
data: Buffer.from(""),
220+
});
221+
expect(output).toMatchObject(["name", "paragraph_replace", "table", "image_test", "table_heading_1", "item_1"]);
222+
});
223+
});
224+
});
225+
});

src/patcher/patch-detector.ts

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
import JSZip from "jszip";
2+
import { toJson } from "./util";
3+
import { traverse } from "./traverser";
4+
import { InputDataType } from "./from-docx";
5+
6+
type PatchDetectorOptions = {
7+
readonly data: InputDataType;
8+
};
9+
10+
/** Detects which patches are needed/present in a template */
11+
export const patchDetector = async ({ data }: PatchDetectorOptions): Promise<readonly string[]> => {
12+
const zipContent = await JSZip.loadAsync(data);
13+
const patches = new Set<string>();
14+
15+
for (const [key, value] of Object.entries(zipContent.files)) {
16+
if (!key.endsWith(".xml") && !key.endsWith(".rels")) {
17+
continue;
18+
}
19+
if (key.startsWith("word/") && !key.endsWith(".xml.rels")) {
20+
const json = toJson(await value.async("text"));
21+
traverse(json).forEach((p) => findPatchKeys(p.text).forEach((patch) => patches.add(patch)));
22+
}
23+
}
24+
return Array.from(patches);
25+
};
26+
27+
const findPatchKeys = (text: string): readonly string[] => {
28+
const pattern = /(?<=\{\{).+?(?=\}\})/gs;
29+
return text.match(pattern) ?? [];
30+
};

src/patcher/traverser.ts

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@ const elementsToWrapper = (wrapper: ElementWrapper): readonly ElementWrapper[] =
1515
parent: wrapper,
1616
})) ?? [];
1717

18-
export const findLocationOfText = (node: Element, text: string): readonly IRenderedParagraphNode[] => {
18+
export const traverse = (node: Element): readonly IRenderedParagraphNode[] => {
1919
let renderedParagraphs: readonly IRenderedParagraphNode[] = [];
2020

2121
// eslint-disable-next-line functional/prefer-readonly-type
@@ -41,5 +41,8 @@ export const findLocationOfText = (node: Element, text: string): readonly IRende
4141
}
4242
}
4343

44-
return renderedParagraphs.filter((p) => p.text.includes(text));
44+
return renderedParagraphs;
4545
};
46+
47+
export const findLocationOfText = (node: Element, text: string): readonly IRenderedParagraphNode[] =>
48+
traverse(node).filter((p) => p.text.includes(text));

0 commit comments

Comments
 (0)