Skip to content

Commit 9a04ae9

Browse files
committed
initial commit
0 parents  commit 9a04ae9

File tree

2 files changed

+64
-0
lines changed

2 files changed

+64
-0
lines changed

.gitignore

+1
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
*\~

docx.go

+63
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,63 @@
1+
package docx
2+
3+
import (
4+
"archive/zip"
5+
"encoding/xml"
6+
"io"
7+
"strings"
8+
)
9+
10+
type Body struct {
11+
Paragraph []string `xml:"p>r>t"`
12+
}
13+
14+
type Document struct {
15+
XMLName xml.Name `xml:"document"`
16+
Body Body `xml:"body"`
17+
}
18+
19+
func (d *Document) Extract(xmlContent string) error {
20+
/*
21+
Extracts the xml elements into their respective struct fields
22+
*/
23+
return xml.Unmarshal([]byte(xmlContent), d)
24+
}
25+
26+
func UnpackDocx(filePath string) (*zip.ReadCloser, []*zip.File) {
27+
// Unzip the doc file
28+
reader, err := zip.OpenReader(filePath)
29+
if err != nil {
30+
panic(err)
31+
}
32+
return reader, reader.File
33+
}
34+
35+
func WordDocToString(reader io.Reader) (content string) {
36+
/*
37+
This converts the file interface object into a raw string
38+
*/
39+
_content := make([]string, 100)
40+
data := make([]byte, 100)
41+
42+
for {
43+
n, err := reader.Read(data)
44+
_content = append(_content, string(data))
45+
if err == io.EOF && n == 0 {
46+
break
47+
}
48+
}
49+
content = strings.Join(_content, "")
50+
return
51+
}
52+
53+
func RetrieveWordDoc(files []*zip.File) (file *zip.File) {
54+
/*
55+
Simply loops over the files looking for the file with name "word/document"
56+
*/
57+
for _, f := range files {
58+
if f.Name == "word/document.xml" {
59+
file = f
60+
}
61+
}
62+
return
63+
}

0 commit comments

Comments
 (0)