Refuse to get the title for any url that points locall

This commit is contained in:
jesopo 2019-04-25 15:58:58 +01:00
parent 9ac7ead57e
commit 7643a962bd
2 changed files with 39 additions and 3 deletions

View file

@ -1,4 +1,4 @@
import hashlib, re
import hashlib, re, urllib.parse
from src import EventManager, ModuleManager, utils
@utils.export("channelset", {"setting": "auto-title",
@ -14,6 +14,14 @@ class Module(ModuleManager.BaseModule):
).hexdigest()
def _get_title(self, url):
if not urllib.parse.urlparse(url).scheme:
url = "http://%s" % url
hostname = urllib.parse.urlparse(url).hostname
if utils.http.is_localhost(hostname):
self.log.warn("tried to get title of localhost: %s", [url])
return None
try:
page = utils.http.request(url, soup=True)
except utils.http.HTTPWrongContentTypeException:

View file

@ -1,6 +1,7 @@
import re, signal, traceback, typing, urllib.error, urllib.parse
import ipaddress, re, signal, socket, traceback, typing
import urllib.error, urllib.parse
import json as _json
import bs4, requests
import bs4, netifaces, requests
from src import utils
REGEX_URL = re.compile("https?://\S+", re.I)
@ -87,3 +88,30 @@ def request(url: str, method: str="GET", get_params: dict={},
def strip_html(s: str) -> str:
return bs4.BeautifulSoup(s, "lxml").get_text()
def resolve_hostname(hostname: str) -> typing.List[str]:
try:
addresses = socket.getaddrinfo(hostname, None, 0, socket.SOCK_STREAM)
except:
return []
return [address[-1][0] for address in addresses]
def is_ip(addr: str) -> bool:
try:
ipaddress.ip_address(addr)
except ValueError:
return False
return True
def is_localhost(hostname: str) -> bool:
if is_ip(hostname):
ips = [ipaddress.ip_address(hostname)]
else:
ips = [ipaddress.ip_address(ip) for ip in resolve_hostname(hostname)]
for interface in netifaces.interfaces():
links = netifaces.ifaddresses(interface)
for link in links[netifaces.AF_INET]+links[netifaces.AF_INET6]:
address = ipaddress.ip_address(link["addr"].split("%", 1)[0])
if address in ips:
return True
return False