From 471c11e229f117ede97b5578f9107ef3c985f1e8 Mon Sep 17 00:00:00 2001 From: Patrick Nappa Date: Fri, 3 May 2019 13:43:08 +1000 Subject: [PATCH 1/2] ensure that non-url characters not separated by whitespace aren't consumed --- src/utils/http.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/utils/http.py b/src/utils/http.py index 55e0c96b..ac02c41d 100644 --- a/src/utils/http.py +++ b/src/utils/http.py @@ -4,7 +4,7 @@ import json as _json import bs4, netifaces, requests from src import utils -REGEX_URL = re.compile("https?://\S+", re.I) +REGEX_URL = re.compile("https?://[A-Z0-9{}]+".format(re.escape("-._~:/?#[]@!$&'()*+,;=")), re.I) USER_AGENT = ("Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 " "(KHTML, like Gecko) Chrome/49.0.2623.87 Safari/537.36") From 2c344c9ddd5e12e95d4317bcd0207ca17e8e52b6 Mon Sep 17 00:00:00 2001 From: Patrick Nappa Date: Fri, 3 May 2019 13:50:51 +1000 Subject: [PATCH 2/2] forgot the beautiful % --- src/utils/http.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/utils/http.py b/src/utils/http.py index ac02c41d..0488b0af 100644 --- a/src/utils/http.py +++ b/src/utils/http.py @@ -4,7 +4,7 @@ import json as _json import bs4, netifaces, requests from src import utils -REGEX_URL = re.compile("https?://[A-Z0-9{}]+".format(re.escape("-._~:/?#[]@!$&'()*+,;=")), re.I) +REGEX_URL = re.compile("https?://[A-Z0-9{}]+".format(re.escape("-._~:/%?#[]@!$&'()*+,;=")), re.I) USER_AGENT = ("Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 " "(KHTML, like Gecko) Chrome/49.0.2623.87 Safari/537.36")