File tree 2 files changed +64
-0
lines changed
2 files changed +64
-0
lines changed Original file line number Diff line number Diff line change
1
+ * \~
Original file line number Diff line number Diff line change
1
+ package docx
2
+
3
+ import (
4
+ "archive/zip"
5
+ "encoding/xml"
6
+ "io"
7
+ "strings"
8
+ )
9
+
10
+ type Body struct {
11
+ Paragraph []string `xml:"p>r>t"`
12
+ }
13
+
14
+ type Document struct {
15
+ XMLName xml.Name `xml:"document"`
16
+ Body Body `xml:"body"`
17
+ }
18
+
19
+ func (d * Document ) Extract (xmlContent string ) error {
20
+ /*
21
+ Extracts the xml elements into their respective struct fields
22
+ */
23
+ return xml .Unmarshal ([]byte (xmlContent ), d )
24
+ }
25
+
26
+ func UnpackDocx (filePath string ) (* zip.ReadCloser , []* zip.File ) {
27
+ // Unzip the doc file
28
+ reader , err := zip .OpenReader (filePath )
29
+ if err != nil {
30
+ panic (err )
31
+ }
32
+ return reader , reader .File
33
+ }
34
+
35
+ func WordDocToString (reader io.Reader ) (content string ) {
36
+ /*
37
+ This converts the file interface object into a raw string
38
+ */
39
+ _content := make ([]string , 100 )
40
+ data := make ([]byte , 100 )
41
+
42
+ for {
43
+ n , err := reader .Read (data )
44
+ _content = append (_content , string (data ))
45
+ if err == io .EOF && n == 0 {
46
+ break
47
+ }
48
+ }
49
+ content = strings .Join (_content , "" )
50
+ return
51
+ }
52
+
53
+ func RetrieveWordDoc (files []* zip.File ) (file * zip.File ) {
54
+ /*
55
+ Simply loops over the files looking for the file with name "word/document"
56
+ */
57
+ for _ , f := range files {
58
+ if f .Name == "word/document.xml" {
59
+ file = f
60
+ }
61
+ }
62
+ return
63
+ }
You can’t perform that action at this time.
0 commit comments