|
1 | 1 | package site
|
2 | 2 |
|
3 | 3 | import (
|
| 4 | + "bytes" |
4 | 5 | "fmt"
|
5 | 6 | "io"
|
| 7 | + "io/ioutil" |
6 | 8 | "net/url"
|
7 | 9 | "regexp"
|
8 | 10 | "strings"
|
9 | 11 |
|
10 |
| - "github.com/ma6254/FictionDown/utils" |
11 |
| - |
12 | 12 | "github.com/antchfx/htmlquery"
|
13 | 13 | "github.com/ma6254/FictionDown/store"
|
| 14 | + "github.com/ma6254/FictionDown/utils" |
14 | 15 | "golang.org/x/text/encoding/simplifiedchinese"
|
15 | 16 | "golang.org/x/text/transform"
|
16 | 17 | )
|
17 | 18 |
|
| 19 | +func init() { |
| 20 | + addSite(SiteA{ |
| 21 | + Name: "新八一中文网", |
| 22 | + HomePage: "https://www.81new.net/", |
| 23 | + Match: []string{ |
| 24 | + `https://www\.81new\.net/\d+/\d+/`, |
| 25 | + `https://www\.81new\.net/\d+/\d+/d+\.html`, |
| 26 | + }, |
| 27 | + BookInfo: wwww81newcomBookInfo, |
| 28 | + Chapter: func(body io.Reader) ([]string, error) { |
| 29 | + doc, err := htmlquery.Parse(body) |
| 30 | + if err != nil { |
| 31 | + return nil, err |
| 32 | + } |
| 33 | + |
| 34 | + M := []string{} |
| 35 | + //list |
| 36 | + // nodeContent := htmlquery.Find(doc, `//div[@id="content"]/text()`) |
| 37 | + nodeContent := htmlquery.Find(doc, `//*[@id="articlecontent"]/text()`) |
| 38 | + if len(nodeContent) == 0 { |
| 39 | + err = fmt.Errorf("No matching content") |
| 40 | + return nil, err |
| 41 | + } |
| 42 | + for _, v := range nodeContent { |
| 43 | + t := htmlquery.InnerText(v) |
| 44 | + t = strings.TrimSpace(t) |
| 45 | + |
| 46 | + switch t { |
| 47 | + case |
| 48 | + "[八一中文网 请记住", |
| 49 | + "手机版访问 m.81new.com 绿色无弹窗]", |
| 50 | + "": |
| 51 | + continue |
| 52 | + } |
| 53 | + |
| 54 | + M = append(M, t) |
| 55 | + } |
| 56 | + |
| 57 | + return M, nil |
| 58 | + }, |
| 59 | + Search: func(s string) (result []ChaperSearchResult, err error) { |
| 60 | + baseurl, err := url.Parse("https://www.81new.net/modules/article/search.php") |
| 61 | + if err != nil { |
| 62 | + return |
| 63 | + } |
| 64 | + value := baseurl.Query() |
| 65 | + gbk_word, _ := simplifiedchinese.GBK.NewEncoder().String(s) |
| 66 | + value.Add("searchkey", gbk_word) |
| 67 | + baseurl.RawQuery = value.Encode() |
| 68 | + |
| 69 | + // Get WebPage |
| 70 | + |
| 71 | + resp, err := utils.RequestGet(baseurl.String()) |
| 72 | + if err != nil { |
| 73 | + return nil, err |
| 74 | + } |
| 75 | + defer resp.Body.Close() |
| 76 | + |
| 77 | + bodyBytes, err := ioutil.ReadAll(resp.Body) |
| 78 | + if err != nil { |
| 79 | + return nil, err |
| 80 | + } |
| 81 | + var body io.Reader = bytes.NewReader(bodyBytes) |
| 82 | + encode := detectContentCharset(bytes.NewReader(bodyBytes)) |
| 83 | + body = transform.NewReader(body, encode.NewDecoder()) |
| 84 | + |
| 85 | + if regexp.MustCompile(`/modules/article/search\.php`).MatchString(resp.Request.URL.Path) { |
| 86 | + // 多个搜索结果 |
| 87 | + doc, err := htmlquery.Parse(body) |
| 88 | + if err != nil { |
| 89 | + return nil, err |
| 90 | + } |
| 91 | + r := htmlquery.Find(doc, `//table[@id="author"]/tbody/tr`) |
| 92 | + if len(r) == 0 { |
| 93 | + return nil, nil |
| 94 | + } |
| 95 | + for _, v := range r[1:] { |
| 96 | + a := htmlquery.FindOne(v, `/*[1]/a`) |
| 97 | + r := ChaperSearchResult{ |
| 98 | + BookName: htmlquery.InnerText(a), |
| 99 | + Author: htmlquery.InnerText(htmlquery.FindOne(v, `/*[3]`)), |
| 100 | + BookURL: htmlquery.SelectAttr(a, "href"), |
| 101 | + } |
| 102 | + result = append(result, r) |
| 103 | + } |
| 104 | + } else if regexp.MustCompile(`/\d+/\d+/*`).MatchString(resp.Request.URL.Path) { |
| 105 | + // 单个搜索结果 |
| 106 | + store, err := wwww81newcomBookInfo(body) |
| 107 | + if err != nil { |
| 108 | + return nil, err |
| 109 | + } |
| 110 | + result = append(result, ChaperSearchResult{ |
| 111 | + BookName: store.BookName, |
| 112 | + Author: store.Author, |
| 113 | + BookURL: resp.Request.URL.String(), |
| 114 | + }) |
| 115 | + } |
| 116 | + |
| 117 | + return |
| 118 | + }, |
| 119 | + }) |
| 120 | +} |
| 121 | + |
18 | 122 | func wwww81newcomBookInfo(body io.Reader) (s *store.Store, err error) {
|
19 | 123 | doc, err := htmlquery.Parse(body)
|
20 | 124 | if err != nil {
|
@@ -76,98 +180,3 @@ func wwww81newcomBookInfo(body io.Reader) (s *store.Store, err error) {
|
76 | 180 |
|
77 | 181 | return
|
78 | 182 | }
|
79 |
| - |
80 |
| -var wwww81newcom = SiteA{ |
81 |
| - Name: "新八一中文网", |
82 |
| - HomePage: "https://www.81new.com/", |
83 |
| - Match: []string{ |
84 |
| - `https://www\.81new\.com/\d+/\d+/`, |
85 |
| - `https://www\.81new\.com/\d+/\d+/d+\.html`, |
86 |
| - `https://www\.81new\.net/\d+/\d+/`, |
87 |
| - `https://www\.81new\.net/\d+/\d+/d+\.html`, |
88 |
| - }, |
89 |
| - BookInfo: wwww81newcomBookInfo, |
90 |
| - Chapter: func(body io.Reader) ([]string, error) { |
91 |
| - doc, err := htmlquery.Parse(body) |
92 |
| - if err != nil { |
93 |
| - return nil, err |
94 |
| - } |
95 |
| - |
96 |
| - M := []string{} |
97 |
| - //list |
98 |
| - // nodeContent := htmlquery.Find(doc, `//div[@id="content"]/text()`) |
99 |
| - nodeContent := htmlquery.Find(doc, `//*[@id="articlecontent"]/text()`) |
100 |
| - if len(nodeContent) == 0 { |
101 |
| - err = fmt.Errorf("No matching content") |
102 |
| - return nil, err |
103 |
| - } |
104 |
| - for _, v := range nodeContent { |
105 |
| - t := htmlquery.InnerText(v) |
106 |
| - t = strings.TrimSpace(t) |
107 |
| - |
108 |
| - switch t { |
109 |
| - case |
110 |
| - "[八一中文网 请记住", |
111 |
| - "手机版访问 m.81new.com 绿色无弹窗]", |
112 |
| - "": |
113 |
| - continue |
114 |
| - } |
115 |
| - |
116 |
| - M = append(M, t) |
117 |
| - } |
118 |
| - |
119 |
| - return M, nil |
120 |
| - }, |
121 |
| - Search: func(s string) (result []ChaperSearchResult, err error) { |
122 |
| - baseurl, err := url.Parse("https://www.81new.com/modules/article/search.php") |
123 |
| - if err != nil { |
124 |
| - return |
125 |
| - } |
126 |
| - value := baseurl.Query() |
127 |
| - gbk_word, _ := simplifiedchinese.GBK.NewEncoder().String(s) |
128 |
| - value.Add("searchkey", gbk_word) |
129 |
| - baseurl.RawQuery = value.Encode() |
130 |
| - |
131 |
| - // Get WebPage |
132 |
| - |
133 |
| - resp, err := utils.RequestGet(baseurl.String()) |
134 |
| - if err != nil { |
135 |
| - return nil, err |
136 |
| - } |
137 |
| - defer resp.Body.Close() |
138 |
| - if regexp.MustCompile(`/modules/article/search\.php`).MatchString(resp.Request.URL.Path) { |
139 |
| - // 多个搜索结果 |
140 |
| - body := transform.NewReader(resp.Body, simplifiedchinese.GBK.NewDecoder()) |
141 |
| - doc, err := htmlquery.Parse(body) |
142 |
| - if err != nil { |
143 |
| - return nil, err |
144 |
| - } |
145 |
| - r := htmlquery.Find(doc, `//table[@id="author"]/tbody/tr`) |
146 |
| - if len(r) == 0 { |
147 |
| - return nil, nil |
148 |
| - } |
149 |
| - for _, v := range r[1:] { |
150 |
| - a := htmlquery.FindOne(v, `/*[1]/a`) |
151 |
| - r := ChaperSearchResult{ |
152 |
| - BookName: htmlquery.InnerText(a), |
153 |
| - Author: htmlquery.InnerText(htmlquery.FindOne(v, `/*[3]`)), |
154 |
| - BookURL: htmlquery.SelectAttr(a, "href"), |
155 |
| - } |
156 |
| - result = append(result, r) |
157 |
| - } |
158 |
| - } else if regexp.MustCompile(`/\d+/\d+/*`).MatchString(resp.Request.URL.Path) { |
159 |
| - // 单个搜索结果 |
160 |
| - store, err := wwww81newcomBookInfo(resp.Body) |
161 |
| - if err != nil { |
162 |
| - return nil, err |
163 |
| - } |
164 |
| - result = append(result, ChaperSearchResult{ |
165 |
| - BookName: store.BookName, |
166 |
| - Author: store.Author, |
167 |
| - BookURL: resp.Request.URL.String(), |
168 |
| - }) |
169 |
| - } |
170 |
| - |
171 |
| - return |
172 |
| - }, |
173 |
| -} |
|
0 commit comments