don't auto-title when a URL contains most of it's <title>
This commit is contained in:
parent
3466a3c43e
commit
8f927afdc9
1 changed files with 21 additions and 0 deletions
|
@ -11,11 +11,27 @@ from src import EventManager, ModuleManager, utils
|
||||||
"Enable/disable shortening URLs when getting their title"))
|
"Enable/disable shortening URLs when getting their title"))
|
||||||
@utils.export("channelset", utils.BoolSetting("auto-title-first",
|
@utils.export("channelset", utils.BoolSetting("auto-title-first",
|
||||||
"Enable/disable showing who first posted a URL that was auto-titled"))
|
"Enable/disable showing who first posted a URL that was auto-titled"))
|
||||||
|
@utils.export("channelset", utils.BoolSetting("auto-title-difference",
|
||||||
|
"Enable/disable checking if a <title> is different enough from the URL"
|
||||||
|
" before showing it"))
|
||||||
class Module(ModuleManager.BaseModule):
|
class Module(ModuleManager.BaseModule):
|
||||||
def _url_hash(self, url):
|
def _url_hash(self, url):
|
||||||
return "sha256:%s" % hashlib.sha256(url.lower().encode("utf8")
|
return "sha256:%s" % hashlib.sha256(url.lower().encode("utf8")
|
||||||
).hexdigest()
|
).hexdigest()
|
||||||
|
|
||||||
|
def _different(self, url, title):
|
||||||
|
url = url.lower()
|
||||||
|
title_words = [word.lower() for word in title.split()]
|
||||||
|
present = 0
|
||||||
|
for title_word in title_words:
|
||||||
|
if title_word in url:
|
||||||
|
present += 1
|
||||||
|
|
||||||
|
# if at least 80% of words are in the URL, too similar
|
||||||
|
if (present/len(title_words)) >= 0.8:
|
||||||
|
return False
|
||||||
|
return True
|
||||||
|
|
||||||
def _get_title(self, server, channel, url):
|
def _get_title(self, server, channel, url):
|
||||||
if not urllib.parse.urlparse(url).scheme:
|
if not urllib.parse.urlparse(url).scheme:
|
||||||
url = "http://%s" % url
|
url = "http://%s" % url
|
||||||
|
@ -35,6 +51,9 @@ class Module(ModuleManager.BaseModule):
|
||||||
if page.data.title:
|
if page.data.title:
|
||||||
title = page.data.title.text.replace("\n", " ").replace(
|
title = page.data.title.text.replace("\n", " ").replace(
|
||||||
"\r", "").replace(" ", " ").strip()
|
"\r", "").replace(" ", " ").strip()
|
||||||
|
if (channel.get_setting("auto-title-difference", True) and
|
||||||
|
not self._different(url, title)):
|
||||||
|
return -2, title
|
||||||
|
|
||||||
if channel.get_setting("title-shorten", False):
|
if channel.get_setting("title-shorten", False):
|
||||||
short_url = self.exports.get_one("shorturl")(server, url,
|
short_url = self.exports.get_one("shorturl")(server, url,
|
||||||
|
@ -72,6 +91,8 @@ class Module(ModuleManager.BaseModule):
|
||||||
[event["user"].nickname, utils.iso8601_format_now(),
|
[event["user"].nickname, utils.iso8601_format_now(),
|
||||||
url])
|
url])
|
||||||
event["stdout"].write(message)
|
event["stdout"].write(message)
|
||||||
|
if code == -2:
|
||||||
|
self.log.debug("Not showing title for %s, too similar", [url])
|
||||||
|
|
||||||
@utils.hook("received.command.t", alias_of="title")
|
@utils.hook("received.command.t", alias_of="title")
|
||||||
@utils.hook("received.command.title", usage="[URL]")
|
@utils.hook("received.command.title", usage="[URL]")
|
||||||
|
|
Loading…
Reference in a new issue