1
1
package crawler
2
2
3
3
import (
4
- "SecureJS/internal/utils" // 你自己项目里的包路径,如有不同需改
4
+ "SecureJS/internal/utils"
5
5
"fmt"
6
6
"log"
7
7
"strings"
@@ -12,8 +12,6 @@ import (
12
12
"github.com/go-rod/rod/lib/launcher"
13
13
"github.com/go-rod/rod/lib/proto"
14
14
15
- // 确保版本更新到包含 `Inject` 的版本
16
- //"github.com/go-rod/stealth"
17
15
)
18
16
19
17
type CrawlResult struct {
@@ -25,15 +23,21 @@ type CrawlResult struct {
25
23
// -----------------------------------------------------------
26
24
// 并发爬取多个链接
27
25
// -----------------------------------------------------------
28
- func crawlAll (urls []string , concurrency int ) ([]* CrawlResult , error ) {
26
+ func crawlAll (urls []string , concurrency int , browserPath string ) ([]* CrawlResult , error ) {
29
27
if len (urls ) == 0 {
30
28
return nil , fmt .Errorf ("no URLs provided" )
31
29
}
32
30
if concurrency <= 0 {
33
31
concurrency = 1
34
32
}
35
33
36
- chromePath := launcher .NewBrowser ().MustGet ()
34
+ var chromePath string
35
+ if browserPath != "" {
36
+ chromePath = browserPath
37
+ } else {
38
+ chromePath = launcher .NewBrowser ().MustGet ()
39
+ }
40
+
37
41
u := launcher .New ().
38
42
Bin (chromePath ).
39
43
Headless (true ). // 调试时可设置为 false
@@ -181,8 +185,8 @@ func tryFetchOneURL(browser *rod.Browser, url string, timeout time.Duration) (*C
181
185
// -----------------------------------------------------------
182
186
// 对外的接口,用于收集
183
187
// -----------------------------------------------------------
184
- func CollectLinks (urls []string , threads int , uniqueLinks map [string ]struct {}, toParse * []string ) error {
185
- results , err := crawlAll (urls , threads )
188
+ func CollectLinks (urls []string , threads int , uniqueLinks map [string ]struct {}, toParse * []string , browserPath string ) error {
189
+ results , err := crawlAll (urls , threads , browserPath )
186
190
if err != nil {
187
191
return fmt .Errorf ("failed to crawl: %v" , err )
188
192
}
0 commit comments