From 078681eddffb3a29b3136ad12ec01bfa25870124 Mon Sep 17 00:00:00 2001 From: jesopo Date: Mon, 8 Jul 2019 12:54:06 +0100 Subject: [PATCH] add missing schema in utils.http.sanitise_url, use in rss.py --- modules/rss.py | 4 ++-- src/utils/http.py | 3 +++ 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/modules/rss.py b/modules/rss.py index e8c45131..bce673aa 100644 --- a/modules/rss.py +++ b/modules/rss.py @@ -103,7 +103,7 @@ class Module(ModuleManager.BaseModule): if not len(event["args_split"]) > 1: raise utils.EventError("Please provide a URL") - url = event["args_split"][1] + url = utils.http.url_sanitise(event["args_split"][1]) if url in rss_hooks: raise utils.EventError("That URL is already being watched") @@ -119,7 +119,7 @@ class Module(ModuleManager.BaseModule): if not len(event["args_split"]) > 1: raise utils.EventError("Please provide a URL") - url = event["args_split"][1] + url = utils.http.url_sanitise(event["args_split"][1]) if not url in rss_hooks: raise utils.EventError("I'm not watching that URL") rss_hooks.remove(url) diff --git a/src/utils/http.py b/src/utils/http.py index 6ee7fdc3..531c24fc 100644 --- a/src/utils/http.py +++ b/src/utils/http.py @@ -9,6 +9,9 @@ REGEX_URL = re.compile("https?://[A-Z0-9{}]+".format(re.escape("-._~:/%?#[]@!$&' # best-effort tidying up of URLs def url_sanitise(url: str): + if not urllib.parse.urlparse(url).scheme: + url = "http://%s" % url + if url.endswith(")"): # trim ")" from the end only if there's not a "(" to match it # google.com/) -> google.com/