From 7643a962bd29504bf48a23383109b7642700a349 Mon Sep 17 00:00:00 2001 From: jesopo Date: Thu, 25 Apr 2019 15:58:58 +0100 Subject: [PATCH] Refuse to get the title for any url that points locall --- modules/title.py | 10 +++++++++- src/utils/http.py | 32 ++++++++++++++++++++++++++++++-- 2 files changed, 39 insertions(+), 3 deletions(-) diff --git a/modules/title.py b/modules/title.py index ddc0a50f..f1ea2b30 100644 --- a/modules/title.py +++ b/modules/title.py @@ -1,4 +1,4 @@ -import hashlib, re +import hashlib, re, urllib.parse from src import EventManager, ModuleManager, utils @utils.export("channelset", {"setting": "auto-title", @@ -14,6 +14,14 @@ class Module(ModuleManager.BaseModule): ).hexdigest() def _get_title(self, url): + if not urllib.parse.urlparse(url).scheme: + url = "http://%s" % url + + hostname = urllib.parse.urlparse(url).hostname + if utils.http.is_localhost(hostname): + self.log.warn("tried to get title of localhost: %s", [url]) + return None + try: page = utils.http.request(url, soup=True) except utils.http.HTTPWrongContentTypeException: diff --git a/src/utils/http.py b/src/utils/http.py index 19f7fc20..09a47607 100644 --- a/src/utils/http.py +++ b/src/utils/http.py @@ -1,6 +1,7 @@ -import re, signal, traceback, typing, urllib.error, urllib.parse +import ipaddress, re, signal, socket, traceback, typing +import urllib.error, urllib.parse import json as _json -import bs4, requests +import bs4, netifaces, requests from src import utils REGEX_URL = re.compile("https?://\S+", re.I) @@ -87,3 +88,30 @@ def request(url: str, method: str="GET", get_params: dict={}, def strip_html(s: str) -> str: return bs4.BeautifulSoup(s, "lxml").get_text() +def resolve_hostname(hostname: str) -> typing.List[str]: + try: + addresses = socket.getaddrinfo(hostname, None, 0, socket.SOCK_STREAM) + except: + return [] + return [address[-1][0] for address in addresses] + +def is_ip(addr: str) -> bool: + try: + ipaddress.ip_address(addr) + except ValueError: + return False + return True + +def is_localhost(hostname: str) -> bool: + if is_ip(hostname): + ips = [ipaddress.ip_address(hostname)] + else: + ips = [ipaddress.ip_address(ip) for ip in resolve_hostname(hostname)] + + for interface in netifaces.interfaces(): + links = netifaces.ifaddresses(interface) + for link in links[netifaces.AF_INET]+links[netifaces.AF_INET6]: + address = ipaddress.ip_address(link["addr"].split("%", 1)[0]) + if address in ips: + return True + return False