Skip to content

Commit 577876c

Browse files
committed
add proxy support
1 parent 2aa7fee commit 577876c

File tree

3 files changed

+48
-5
lines changed

3 files changed

+48
-5
lines changed

cmd/root.go

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -54,4 +54,7 @@ func init() {
5454
rootCmd.Flags().StringSliceP("include", "i", []string{}, "include only domains / ex : -i google.com,facebook.com")
5555

5656
rootCmd.Flags().IntP("max-concurrency", "m", 1000, "max concurrent tasks / ex: -m 10")
57+
58+
rootCmd.Flags().StringP("proxy", "p", "", "proxy to use / ex: -p http://proxy.com:8080")
59+
5760
}

core/crawler.go

Lines changed: 12 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -85,20 +85,29 @@ type Crawler struct {
8585
}
8686

8787
func NewCrawler(options *shared.Options) *Crawler {
88-
return &Crawler{
88+
crawler := Crawler{
8989
RootURL: options.URL,
9090
Level: options.Level,
9191
LiveMode: options.LiveMode,
9292
ExportFile: options.ExportFile,
9393
RegexMap: options.RegexMap,
9494
ExcludedStatus: options.StatusResponses,
9595
IncludedUrls: options.IncludedUrls,
96-
Client: &http.Client{Transport: &http.Transport{TLSClientConfig: &tls.Config{InsecureSkipVerify: true}}},
9796
Cache: Cache{
9897
Visited: make(map[string]bool),
9998
},
10099
MaxConcurrency: options.MaxConcurrency,
101100
}
101+
transport := &http.Transport{
102+
TLSClientConfig: &tls.Config{InsecureSkipVerify: true},
103+
}
104+
if options.Proxy != nil {
105+
transport.Proxy = http.ProxyURL(options.Proxy)
106+
}
107+
crawler.Client = &http.Client{
108+
Transport: transport,
109+
}
110+
return &crawler
102111
}
103112

104113
func (c *Crawler) Fetch(page *webtree.Page) {
@@ -132,9 +141,7 @@ func (c *Crawler) Fetch(page *webtree.Page) {
132141
func (c *Crawler) ExtractLinks(page *webtree.Page) (links []string) {
133142
regex := regexp.MustCompile(GeneralRegex)
134143
generalUrlMatches := regex.FindAllString(page.GetData(), -1)
135-
for _, link := range generalUrlMatches {
136-
links = append(links, link)
137-
}
144+
links = append(links, generalUrlMatches...)
138145
hrefRegex := regexp.MustCompile(HrefRegex)
139146
hrefMatches := hrefRegex.FindAllStringSubmatch(page.GetData(), -1)
140147
for _, match := range hrefMatches {

shared/options.go

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@ package shared
33
import (
44
"fmt"
55
"net"
6+
urltool "net/url"
67
"reflect"
78
"regexp"
89
"strings"
@@ -20,6 +21,7 @@ type Options struct {
2021
StatusResponses []int `name:"exclude codes"`
2122
IncludedUrls []string `name:"include"`
2223
MaxConcurrency int `name:"max concurrency"`
24+
Proxy *urltool.URL `name:"proxy"`
2325
}
2426

2527
func (o Options) BuildOptionBanner() string {
@@ -112,6 +114,16 @@ func (o Options) BuildOptionBanner() string {
112114
banner += color.CyanString(" %s: %s", k, v)
113115
banner += "\n"
114116
}
117+
} else {
118+
119+
banner += color.RedString("│")
120+
banner += color.BlueString(name + ": ")
121+
if fmt.Sprintf("%v", value) == "<nil>" {
122+
banner += color.CyanString("not set")
123+
} else {
124+
banner += color.CyanString("%v", value)
125+
}
126+
banner += "\n"
115127
}
116128

117129
}
@@ -189,6 +201,26 @@ func ValidateThenBuildOption(cmd *cobra.Command) (*Options, error) {
189201
return nil, err
190202
}
191203

204+
proxy, err := cmd.Flags().GetString("proxy")
205+
if err != nil {
206+
return nil, err
207+
}
208+
209+
var parsedProxy *urltool.URL
210+
if proxy != "" {
211+
parsedProxy, err = urltool.Parse(proxy)
212+
if err != nil {
213+
return nil, err
214+
}
215+
216+
} else {
217+
parsedProxy = nil
218+
}
219+
// set max concurrency to 1 if live mode is enabled
220+
if liveMode {
221+
maxConcurrency = 1
222+
}
223+
192224
options := &Options{
193225
URL: url,
194226
Level: level,
@@ -198,6 +230,7 @@ func ValidateThenBuildOption(cmd *cobra.Command) (*Options, error) {
198230
StatusResponses: excludedStatus,
199231
IncludedUrls: includedUrls,
200232
MaxConcurrency: maxConcurrency,
233+
Proxy: parsedProxy,
201234
}
202235
options.ManipulateData()
203236
return options, nil

0 commit comments

Comments
 (0)