Skip to content

Add checks to prevent De-AMP loops #13213

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 4 commits into from
May 10, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
38 changes: 32 additions & 6 deletions components/de_amp/browser/de_amp_throttle.cc
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
#include <utility>

#include "base/feature_list.h"
#include "base/strings/stringprintf.h"
#include "brave/components/de_amp/browser/de_amp_url_loader.h"
#include "brave/components/de_amp/common/features.h"
#include "brave/components/de_amp/common/pref_names.h"
Expand All @@ -27,6 +28,8 @@

namespace de_amp {

constexpr char kDeAmpHeaderName[] = "X-Brave-De-AMP";

// static
std::unique_ptr<DeAmpThrottle> DeAmpThrottle::MaybeCreateThrottleFor(
scoped_refptr<base::SequencedTaskRunner> task_runner,
Expand All @@ -52,14 +55,28 @@ DeAmpThrottle::DeAmpThrottle(
scoped_refptr<base::SequencedTaskRunner> task_runner,
const network::ResourceRequest& request,
const content::WebContents::Getter& wc_getter)
: task_runner_(task_runner), request_(request), wc_getter_(wc_getter) {}
: task_runner_(task_runner),
request_(request),
is_amp_redirect_(false),
wc_getter_(wc_getter) {}

DeAmpThrottle::~DeAmpThrottle() = default;

void DeAmpThrottle::WillStartRequest(network::ResourceRequest* request,
bool* defer) {
if (request->headers.HasHeader(kDeAmpHeaderName)) {
is_amp_redirect_ = true;
request->headers.RemoveHeader(kDeAmpHeaderName);
}
}

void DeAmpThrottle::WillProcessResponse(
const GURL& response_url,
network::mojom::URLResponseHead* response_head,
bool* defer) {
if (is_amp_redirect_)
return;

VLOG(2) << "deamp throttling: " << response_url;
*defer = true;

Expand All @@ -76,21 +93,29 @@ void DeAmpThrottle::WillProcessResponse(
std::move(new_remote), std::move(new_receiver), de_amp_loader);
}

void DeAmpThrottle::Redirect(const GURL& new_url, const GURL& response_url) {
bool DeAmpThrottle::OpenCanonicalURL(const GURL& new_url,
const GURL& response_url) {
auto* contents = wc_getter_.Run();

if (!contents)
return;
return false;

auto* entry = contents->GetController().GetPendingEntry();
if (!entry) {
if (contents->GetController().GetVisibleEntry()) {
entry = contents->GetController().GetVisibleEntry();
} else {
return;
return false;
}
}

// If the canonical/target URL is the same as the current pending URL being
// navigated to, we should stop De-AMPing. This is done to prevent redirect
// loops. https://github.com/brave/brave-browser/issues/22610
if (new_url == entry->GetURL()) {
return false;
}

DCHECK(entry->GetURL() == response_url);

delegate_->CancelWithError(net::ERR_ABORTED);
Expand All @@ -104,10 +129,10 @@ void DeAmpThrottle::Redirect(const GURL& new_url, const GURL& response_url) {

params.initiator_origin = request_.request_initiator;
params.user_gesture = request_.has_user_gesture;

auto redirect_chain = request_.navigation_redirect_chain;
DCHECK(redirect_chain.size());
redirect_chain.pop_back();
params.redirect_chain = std::move(redirect_chain);
params.extra_headers += base::StringPrintf("%s: true\r\n", kDeAmpHeaderName);

base::SequencedTaskRunnerHandle::Get()->PostTask(
FROM_HERE, base::BindOnce(
Expand All @@ -118,6 +143,7 @@ void DeAmpThrottle::Redirect(const GURL& new_url, const GURL& response_url) {
web_contents->OpenURL(params);
},
contents->GetWeakPtr(), std::move(params)));
return true;
}

} // namespace de_amp
5 changes: 4 additions & 1 deletion components/de_amp/browser/de_amp_throttle.h
Original file line number Diff line number Diff line change
Expand Up @@ -35,15 +35,18 @@ class DeAmpThrottle : public body_sniffer::BodySnifferThrottle {
const content::WebContents::Getter& wc_getter);

// Implements blink::URLLoaderThrottle.
void WillStartRequest(network::ResourceRequest* request,
bool* defer) override;
void WillProcessResponse(const GURL& response_url,
network::mojom::URLResponseHead* response_head,
bool* defer) override;

void Redirect(const GURL& new_url, const GURL& response_url);
bool OpenCanonicalURL(const GURL& new_url, const GURL& response_url);

private:
scoped_refptr<base::SequencedTaskRunner> task_runner_;
network::ResourceRequest request_;
bool is_amp_redirect_;
content::WebContents::Getter wc_getter_;
base::WeakPtrFactory<DeAmpThrottle> weak_factory_{this};
};
Expand Down
5 changes: 4 additions & 1 deletion components/de_amp/browser/de_amp_url_loader.cc
Original file line number Diff line number Diff line change
Expand Up @@ -89,8 +89,11 @@ bool DeAmpURLLoader::MaybeRedirectToCanonicalLink() {
return false;
}
VLOG(2) << __func__ << " de-amping and loading " << canonical_url;
if (!de_amp_throttle_->OpenCanonicalURL(canonical_url, response_url_)) {
return false;
}
// Only abort if we know we're successfully going to the canonical URL
Abort();
de_amp_throttle_->Redirect(canonical_url, response_url_);
return true;
} else {
// Did not find AMP page and/or canonical link, load original
Expand Down
116 changes: 93 additions & 23 deletions components/de_amp/browser/test/de_amp_browsertest.cc
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
#include "content/public/test/test_navigation_observer.h"
#include "net/base/net_errors.h"
#include "net/dns/mock_host_resolver.h"
#include "net/http/http_status_code.h"
#include "net/test/embedded_test_server/embedded_test_server.h"
#include "net/test/embedded_test_server/http_request.h"
#include "net/test/embedded_test_server/http_response.h"
Expand All @@ -35,7 +36,9 @@
#endif

const char kTestHost[] = "a.test.com";
const char kTestAmpPage[] = "/test.html";
const char kTestAmpPage[] = "/test";
const char kTestRedirectingAmpPage1[] = "/redirecting_amp_page_1";
const char kTestRedirectingAmpPage2[] = "/redirecting_amp_page_2";
const char kTestSimpleNonAmpPage[] = "/simple";
const char kTestCanonicalPage[] = "/simple_canonical";
const char kTestAmpBody[] =
Expand Down Expand Up @@ -137,10 +140,9 @@ class DeAmpBrowserTest : public InProcessBrowserTest {
PrefService* prefs_;
};

std::unique_ptr<net::test_server::HttpResponse> HandleRequest(
const std::string& base_url,
const std::string& canonical_page,
std::unique_ptr<net::test_server::HttpResponse> BuildHttpResponseForAmpPage(
const std::string& body,
const std::string& canonical_link,
const net::test_server::HttpRequest& request) {
const auto port = request.GetURL().port();

Expand All @@ -150,14 +152,54 @@ std::unique_ptr<net::test_server::HttpResponse> HandleRequest(
// ... rel="canonical" href="https://%s:%s%s" becomes
// href="https://<test server base url>:<test port>/<canonical html page>"
http_response->set_content(base::StringPrintf(
body.c_str(), base_url.c_str(), port.c_str(), canonical_page.c_str()));
body.c_str(), kTestHost, port.c_str(), canonical_link.c_str()));
return http_response;
}

std::unique_ptr<net::test_server::HttpResponse> HandleRequestForRedirectTest(
const std::string& body,
const net::test_server::HttpRequest& request) {
if (request.relative_url == kTestRedirectingAmpPage1) {
return BuildHttpResponseForAmpPage(body, kTestRedirectingAmpPage2, request);
} else if (request.relative_url == kTestRedirectingAmpPage2) {
return BuildHttpResponseForAmpPage(body, kTestRedirectingAmpPage1, request);
}
return nullptr;
}

std::unique_ptr<net::test_server::HttpResponse> HandleServerRedirect(
net::HttpStatusCode code,
const std::string& source,
const std::string& dest,
const net::test_server::HttpRequest& request) {
GURL request_url = request.GetURL();

if (request_url.path() == source) {
auto http_response =
std::make_unique<net::test_server::BasicHttpResponse>();
http_response->set_code(code);
http_response->AddCustomHeader("Location", dest);
http_response->set_content_type("text/html");
http_response->set_content(base::StringPrintf(
"<html><head></head><body>Redirecting to %s</body></html>",
dest.c_str()));
return http_response;
} else {
return nullptr;
}
}

std::unique_ptr<net::test_server::HttpResponse> HandleRequest(
const std::string& canonical_link,
const std::string& body,
const net::test_server::HttpRequest& request) {
return BuildHttpResponseForAmpPage(body, canonical_link, request);
}

IN_PROC_BROWSER_TEST_F(DeAmpBrowserTest, SimpleDeAmp) {
TogglePref(true);
https_server_->RegisterRequestHandler(base::BindRepeating(
HandleRequest, kTestHost, kTestSimpleNonAmpPage, kTestNonAmpBody));
HandleRequest, kTestSimpleNonAmpPage, kTestNonAmpBody));

ASSERT_TRUE(https_server_->Start());

Expand All @@ -169,14 +211,42 @@ IN_PROC_BROWSER_TEST_F(DeAmpBrowserTest, SimpleDeAmp) {
// Now go to an AMP page
https_server_.reset(new net::EmbeddedTestServer(
net::test_server::EmbeddedTestServer::TYPE_HTTPS));
https_server_->RegisterRequestHandler(base::BindRepeating(
HandleRequest, kTestHost, kTestCanonicalPage, kTestAmpBody));
https_server_->RegisterRequestHandler(
base::BindRepeating(HandleRequest, kTestCanonicalPage, kTestAmpBody));
ASSERT_TRUE(https_server_->Start());
const GURL original_url = https_server_->GetURL(kTestHost, kTestAmpPage);
const GURL landing_url = https_server_->GetURL(kTestHost, kTestCanonicalPage);
NavigateToURLAndWaitForRedirects(original_url, landing_url);
}

IN_PROC_BROWSER_TEST_F(DeAmpBrowserTest, AmpPagesPointingAtEachOther) {
TogglePref(true);
https_server_->RegisterRequestHandler(
base::BindRepeating(HandleRequestForRedirectTest, kTestAmpBody));
ASSERT_TRUE(https_server_->Start());
const GURL original_url =
https_server_->GetURL(kTestHost, kTestRedirectingAmpPage1);
const GURL landing_url =
https_server_->GetURL(kTestHost, kTestRedirectingAmpPage2);
NavigateToURLAndWaitForRedirects(original_url, landing_url);
}

IN_PROC_BROWSER_TEST_F(DeAmpBrowserTest, CanonicalRedirectsToAmp) {
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this needs to start from the original amp page to properly replicate the issue as mentioned in DM and on the issue

TogglePref(true);
https_server_->RegisterRequestHandler(
base::BindRepeating(HandleRequest, kTestCanonicalPage, kTestAmpBody));
https_server_->RegisterRequestHandler(base::BindRepeating(
HandleServerRedirect, net::HttpStatusCode::HTTP_PERMANENT_REDIRECT,
kTestCanonicalPage, kTestAmpPage));
ASSERT_TRUE(https_server_->Start());

const GURL amp_url = https_server_->GetURL(kTestHost, kTestAmpPage);
const GURL canonical_url =
https_server_->GetURL(kTestHost, kTestCanonicalPage);

NavigateToURLAndWaitForRedirects(amp_url, canonical_url);
}

IN_PROC_BROWSER_TEST_F(DeAmpBrowserTest, NonHttpScheme) {
TogglePref(true);
const char nonHttpSchemeBody[] =
Expand All @@ -188,25 +258,25 @@ IN_PROC_BROWSER_TEST_F(DeAmpBrowserTest, NonHttpScheme) {
</head></html>
)";
https_server_->RegisterRequestHandler(base::BindRepeating(
HandleRequest, kTestHost, kTestCanonicalPage, nonHttpSchemeBody));
HandleRequest, kTestCanonicalPage, nonHttpSchemeBody));
ASSERT_TRUE(https_server_->Start());
const GURL original_url = https_server_->GetURL(kTestHost, kTestAmpPage);
NavigateToURLAndWaitForRedirects(original_url, original_url);
}

IN_PROC_BROWSER_TEST_F(DeAmpBrowserTest, CanonicalLinkSameAsAmpPage) {
TogglePref(true);
https_server_->RegisterRequestHandler(base::BindRepeating(
HandleRequest, kTestHost, kTestAmpPage, kTestAmpBody));
https_server_->RegisterRequestHandler(
base::BindRepeating(HandleRequest, kTestAmpPage, kTestAmpBody));
ASSERT_TRUE(https_server_->Start());
const GURL original_url = https_server_->GetURL(kTestHost, kTestAmpPage);
NavigateToURLAndWaitForRedirects(original_url, original_url);
}

IN_PROC_BROWSER_TEST_F(DeAmpBrowserTest, PrefOff) {
TogglePref(false);
https_server_->RegisterRequestHandler(base::BindRepeating(
HandleRequest, kTestHost, kTestCanonicalPage, kTestAmpBody));
https_server_->RegisterRequestHandler(
base::BindRepeating(HandleRequest, kTestCanonicalPage, kTestAmpBody));
ASSERT_TRUE(https_server_->Start());
const GURL original_url = https_server_->GetURL(kTestHost, kTestAmpPage);
// Doesn't get De-AMPed
Expand All @@ -215,8 +285,8 @@ IN_PROC_BROWSER_TEST_F(DeAmpBrowserTest, PrefOff) {

IN_PROC_BROWSER_TEST_F(DeAmpBrowserTest, RestoreTab) {
TogglePref(true);
https_server_->RegisterRequestHandler(base::BindRepeating(
HandleRequest, kTestHost, kTestCanonicalPage, kTestAmpBody));
https_server_->RegisterRequestHandler(
base::BindRepeating(HandleRequest, kTestCanonicalPage, kTestAmpBody));
ASSERT_TRUE(https_server_->Start());
const GURL original_url = https_server_->GetURL(kTestHost, kTestAmpPage);
const GURL landing_url = https_server_->GetURL(kTestHost, kTestCanonicalPage);
Expand All @@ -240,7 +310,7 @@ IN_PROC_BROWSER_TEST_F(DeAmpBrowserTest, RestoreTab) {
IN_PROC_BROWSER_TEST_F(DeAmpBrowserTest, AmpURLNotStoredInHistory) {
TogglePref(true);
https_server_->RegisterRequestHandler(base::BindRepeating(
HandleRequest, kTestHost, kTestSimpleNonAmpPage, kTestNonAmpBody));
HandleRequest, kTestSimpleNonAmpPage, kTestNonAmpBody));

ASSERT_TRUE(https_server_->Start());

Expand All @@ -251,8 +321,8 @@ IN_PROC_BROWSER_TEST_F(DeAmpBrowserTest, AmpURLNotStoredInHistory) {
// Now go to an AMP page
https_server_.reset(new net::EmbeddedTestServer(
net::test_server::EmbeddedTestServer::TYPE_HTTPS));
https_server_->RegisterRequestHandler(base::BindRepeating(
HandleRequest, kTestHost, kTestCanonicalPage, kTestAmpBody));
https_server_->RegisterRequestHandler(
base::BindRepeating(HandleRequest, kTestCanonicalPage, kTestAmpBody));
ASSERT_TRUE(https_server_->Start());

const GURL original_url1 = https_server_->GetURL(kTestHost, kTestAmpPage);
Expand All @@ -264,8 +334,8 @@ IN_PROC_BROWSER_TEST_F(DeAmpBrowserTest, AmpURLNotStoredInHistory) {
const std::string another_canonical_page = "/simple_canonical2.html";
https_server_.reset(new net::EmbeddedTestServer(
net::test_server::EmbeddedTestServer::TYPE_HTTPS));
https_server_->RegisterRequestHandler(base::BindRepeating(
HandleRequest, kTestHost, another_canonical_page, kTestAmpBody));
https_server_->RegisterRequestHandler(
base::BindRepeating(HandleRequest, another_canonical_page, kTestAmpBody));
ASSERT_TRUE(https_server_->Start());

const GURL original_url2 = https_server_->GetURL(kTestHost, kTestAmpPage);
Expand All @@ -290,7 +360,7 @@ IN_PROC_BROWSER_TEST_F(DeAmpBrowserTest, AmpURLNotStoredInHistory) {
IN_PROC_BROWSER_TEST_F(DeAmpBrowserTest, NonDeAmpedPageSameAsOriginal) {
TogglePref(true);
https_server_->RegisterRequestHandler(base::BindRepeating(
HandleRequest, kTestHost, kTestSimpleNonAmpPage, kTestNonAmpBody));
HandleRequest, kTestSimpleNonAmpPage, kTestNonAmpBody));
ASSERT_TRUE(https_server_->Start());

const GURL original_url =
Expand Down Expand Up @@ -323,8 +393,8 @@ class DeAmpBrowserTestBaseFeatureDisabled : public DeAmpBrowserTest {
};

IN_PROC_BROWSER_TEST_F(DeAmpBrowserTestBaseFeatureDisabled, DoesNotDeAmp) {
https_server_->RegisterRequestHandler(base::BindRepeating(
HandleRequest, kTestHost, kTestCanonicalPage, kTestAmpBody));
https_server_->RegisterRequestHandler(
base::BindRepeating(HandleRequest, kTestCanonicalPage, kTestAmpBody));
ASSERT_TRUE(https_server_->Start());
const GURL original_url = https_server_->GetURL(kTestHost, kTestAmpPage);
// Doesn't get De-AMPed
Expand Down