handle http redirects ourselves, to prevent redirect to localhost

This commit is contained in:
jesopo 2020-04-05 22:41:33 +01:00
parent 223c2c88ff
commit 8a3f10e360
2 changed files with 46 additions and 20 deletions

View file

@ -45,13 +45,9 @@ class Module(ModuleManager.BaseModule):
if not urllib.parse.urlparse(url).scheme:
url = "http://%s" % url
hostname = urllib.parse.urlparse(url).hostname
if not utils.http.host_permitted(hostname):
self.log.warn("Attempted to get forbidden host: %s", [url])
return -1, None
request = utils.http.Request(url, check_hostname=True)
try:
page = utils.http.request(url)
page = utils.http.request(request)
except Exception as e:
self.log.error("failed to get URL title for %s: %s", [url, str(e)])
return -1, None

View file

@ -69,6 +69,7 @@ class Request(object):
json_body: bool = False
allow_redirects: bool = True
check_hostname: bool = False
check_content_type: bool = True
fallback_encoding: typing.Optional[str] = None
content_type: typing.Optional[str] = None
@ -169,24 +170,53 @@ def request(request_obj: typing.Union[str, Request], **kwargs) -> Response:
request_obj = Request(request_obj, **kwargs)
return _request(request_obj)
class HostNameInvalidError(ValueError):
pass
class TooManyRedirectionsError(Exception):
pass
def _request(request_obj: Request) -> Response:
request_obj.validate()
def _assert_allowed(url: str):
hostname = urllib.parse.urlparse(url).hostname
if hostname is None or not host_permitted(hostname):
raise HostNameInvalidError(
f"hostname {hostname} is not permitted")
def _wrap() -> Response:
headers = request_obj.get_headers()
response = requests.request(
request_obj.method,
request_obj.url,
headers=headers,
params=request_obj.get_params,
data=request_obj.get_body(),
allow_redirects=request_obj.allow_redirects,
stream=True,
cookies=request_obj.cookies
)
response_content = response.raw.read(RESPONSE_MAX,
decode_content=True)
if not response.raw.read(1) == b"":
raise ValueError("Response too large")
redirect = 0
current_url = request_obj.url
while True:
if request_obj.check_hostname:
_assert_allowed(current_url)
response = requests.request(
request_obj.method,
current_url,
headers=headers,
params=request_obj.get_params,
data=request_obj.get_body(),
allow_redirects=False,
stream=True,
cookies=request_obj.cookies
)
if response.status_code in [301, 302]:
redirect += 1
if redirect == 5:
raise TooManyRedirectionsError(f"{redirect} redirects")
else:
current_url = response.headers["location"]
continue
response_content = response.raw.read(RESPONSE_MAX,
decode_content=True)
if not response.raw.read(1) == b"":
raise ValueError("Response too large")
break
headers = utils.CaseInsensitiveDict(dict(response.headers))
our_response = Response(response.status_code, response_content,