Skip to content

Multipart body should not be map[string][]byte #849

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Draft
wants to merge 4 commits into
base: master
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 6 additions & 6 deletions _examples/multipart/multipart.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,17 +10,17 @@ import (
"github.com/gocolly/colly/v2"
)

func generateFormData() map[string][]byte {
func generateFormData() []colly.FormField {
f, _ := os.Open("gocolly.jpg")
defer f.Close()

imgData, _ := io.ReadAll(f)

return map[string][]byte{
"firstname": []byte("one"),
"lastname": []byte("two"),
"email": []byte("[email protected]"),
"file": imgData,
return []colly.FormField{
{Name: "firstname", Value: []byte("one")},
{Name: "lastname", Value: []byte("two")},
{Name: "email", Value: []byte("[email protected]")},
{Name: "file", Value: imgData, Filename: "gocolly.jpg"},
}
}

Expand Down
31 changes: 22 additions & 9 deletions colly.go
Original file line number Diff line number Diff line change
Expand Up @@ -552,13 +552,13 @@ func (c *Collector) PostRaw(URL string, requestData []byte) error {
}

// PostMultipart starts a collector job by creating a Multipart POST request
// with raw binary data. PostMultipart also calls the previously provided callbacks
func (c *Collector) PostMultipart(URL string, requestData map[string][]byte) error {
// with form fields. PostMultipart also calls the previously provided callbacks
func (c *Collector) PostMultipart(URL string, fields []FormField) error {
boundary := randomBoundary()
hdr := http.Header{}
hdr.Set("Content-Type", "multipart/form-data; boundary="+boundary)
hdr.Set("User-Agent", c.UserAgent)
return c.scrape(URL, "POST", 1, createMultipartReader(boundary, requestData), nil, hdr, true)
return c.scrape(URL, "POST", 1, createMultipartReader(boundary, fields), nil, hdr, true)
}

// Request starts a collector job by creating a custom HTTP request
Expand Down Expand Up @@ -1510,23 +1510,26 @@ func createFormReader(data map[string]string) io.Reader {
return strings.NewReader(form.Encode())
}

func createMultipartReader(boundary string, data map[string][]byte) io.Reader {
func createMultipartReader(boundary string, fields []FormField) io.Reader {
dashBoundary := "--" + boundary

body := []byte{}
buffer := bytes.NewBuffer(body)

buffer.WriteString("Content-type: multipart/form-data; boundary=" + boundary + "\n\n")
for contentType, content := range data {
for _, field := range fields {
buffer.WriteString(dashBoundary + "\n")
buffer.WriteString("Content-Disposition: form-data; name=" + contentType + "\n")
buffer.WriteString(fmt.Sprintf("Content-Length: %d \n\n", len(content)))
buffer.Write(content)
buffer.WriteString("Content-Disposition: form-data; name=\"" + field.Name + "\"")
if field.Filename != "" {
buffer.WriteString("; filename=\"" + field.Filename + "\"")
}
buffer.WriteString("\n")
buffer.WriteString(fmt.Sprintf("Content-Length: %d \n\n", len(field.Value)))
buffer.Write(field.Value)
buffer.WriteString("\n")
}
buffer.WriteString(dashBoundary + "--\n\n")
return bytes.NewReader(buffer.Bytes())

}

// randomBoundary was borrowed from
Expand Down Expand Up @@ -1615,3 +1618,13 @@ func requestHash(url string, body io.Reader) uint64 {
}
return h.Sum64()
}

// FormField represents a single field in a multipart form
type FormField struct {
// Name is the form field name
Name string
// Value is the form field value
Value []byte
// Filename is the name of the file being uploaded (optional)
Filename string
}
110 changes: 109 additions & 1 deletion colly_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -104,7 +104,21 @@ func newUnstartedTestServer() *httptest.Server {
mux.HandleFunc("/login", func(w http.ResponseWriter, r *http.Request) {
if r.Method == "POST" {
w.Header().Set("Content-Type", "text/html")
w.Write([]byte(r.FormValue("name")))
if strings.Contains(r.Header.Get("Content-Type"), "multipart/form-data") {
if err := r.ParseMultipartForm(1024); err != nil {
w.WriteHeader(http.StatusBadRequest)
return
}
// For multipart form, return the first form value
for _, values := range r.MultipartForm.Value {
if len(values) > 0 {
w.Write([]byte(values[0]))
return
}
}
} else {
w.Write([]byte(r.FormValue("name")))
}
}
})

Expand Down Expand Up @@ -1871,3 +1885,97 @@ func TestCollectorPostRetryUnseekable(t *testing.T) {
t.Error("OnResponse Retry was called but BodyUnseekable")
}
}

func TestCollectorPostMultipart(t *testing.T) {
ts := newTestServer()
defer ts.Close()

c := NewCollector()

// Test basic multipart form submission
fields := []FormField{
{Name: "text1", Value: []byte("value1")},
{Name: "text2", Value: []byte("value2")},
}

c.OnResponse(func(r *Response) {
if r.StatusCode != 200 {
t.Errorf("Expected status code 200, got %d", r.StatusCode)
}
})

err := c.PostMultipart(ts.URL+"/login", fields)
if err != nil {
t.Errorf("PostMultipart failed: %v", err)
}
}

func TestCollectorPostMultipartWithFile(t *testing.T) {
ts := newTestServer()
defer ts.Close()

c := NewCollector()

// Test multipart form with file upload
fields := []FormField{
{Name: "text1", Value: []byte("value1")},
{Name: "file1", Value: []byte("file content"), Filename: "test.txt"},
}

c.OnResponse(func(r *Response) {
if r.StatusCode != 200 {
t.Errorf("Expected status code 200, got %d", r.StatusCode)
}
})

err := c.PostMultipart(ts.URL+"/login", fields)
if err != nil {
t.Errorf("PostMultipart with file failed: %v", err)
}
}

func TestCollectorPostMultipartWithDifferentFields(t *testing.T) {
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
if r.Method != "POST" {
t.Errorf("Expected POST request, got %s", r.Method)
}
if err := r.ParseMultipartForm(1024); err != nil {
t.Errorf("Failed to parse multipart form: %v", err)
}
// Get the first form value from either Form or MultipartForm
var firstValue string
if r.MultipartForm != nil {
for _, values := range r.MultipartForm.Value {
if len(values) > 0 {
firstValue = values[0]
break
}
}
}
if firstValue == "" {
firstValue = r.FormValue("name")
}
w.Write([]byte(firstValue))
}))
defer server.Close()

c := NewCollector()

// First visit with username/password
err := c.PostMultipart(server.URL+"/login", []FormField{
{Name: "username", Value: []byte("testuser")},
{Name: "password", Value: []byte("testpass")},
})
if err != nil {
t.Errorf("Failed to post multipart form: %v", err)
}

// Second visit with different fields
err = c.PostMultipart(server.URL+"/login", []FormField{
{Name: "email", Value: []byte("[email protected]")},
{Name: "message", Value: []byte("Hello")},
})
if err != nil {
t.Errorf("Failed to post multipart form with different fields: %v", err)
}
}
8 changes: 4 additions & 4 deletions request.go
Original file line number Diff line number Diff line change
Expand Up @@ -139,14 +139,14 @@ func (r *Request) PostRaw(URL string, requestData []byte) error {
}

// PostMultipart starts a collector job by creating a Multipart POST request
// with raw binary data. PostMultipart also calls the previously provided.
// callbacks
func (r *Request) PostMultipart(URL string, requestData map[string][]byte) error {
// with form fields. PostMultipart preserves the Context of the previous request
// and calls the previously provided callbacks
func (r *Request) PostMultipart(URL string, fields []FormField) error {
boundary := randomBoundary()
hdr := http.Header{}
hdr.Set("Content-Type", "multipart/form-data; boundary="+boundary)
hdr.Set("User-Agent", r.collector.UserAgent)
return r.collector.scrape(r.AbsoluteURL(URL), "POST", r.Depth+1, createMultipartReader(boundary, requestData), r.Ctx, hdr, true)
return r.collector.scrape(r.AbsoluteURL(URL), "POST", r.Depth+1, createMultipartReader(boundary, fields), r.Ctx, hdr, true)
}

// Retry submits HTTP request again with the same parameters
Expand Down