don't similarity test empty <title>s

This commit is contained in:
jesopo 2020-02-12 11:35:40 +00:00
parent cc7c66883d
commit a5acf0e3aa

View file

@ -29,16 +29,17 @@ class Module(ModuleManager.BaseModule):
title_word = title_word.lower() title_word = title_word.lower()
title_words.append(title_word.strip("'\"<>()")) title_words.append(title_word.strip("'\"<>()"))
present = 0 if title_words:
for title_word in title_words: present = 0
if title_word in url: for title_word in title_words:
present += 1 if title_word in url:
present += 1
similarity = present/len(title_words) similarity = present/len(title_words)
# if at least 80% of words are in the URL, too similar # less than 80% similar, proceed
if similarity >= 0.8: if similarity < 0.8:
return False return True
return True return False
def _get_title(self, server, channel, url): def _get_title(self, server, channel, url):
if not urllib.parse.urlparse(url).scheme: if not urllib.parse.urlparse(url).scheme: