don't similarity test empty <title>s
This commit is contained in:
parent
cc7c66883d
commit
a5acf0e3aa
1 changed files with 10 additions and 9 deletions
|
@ -29,16 +29,17 @@ class Module(ModuleManager.BaseModule):
|
|||
title_word = title_word.lower()
|
||||
title_words.append(title_word.strip("'\"<>()"))
|
||||
|
||||
present = 0
|
||||
for title_word in title_words:
|
||||
if title_word in url:
|
||||
present += 1
|
||||
if title_words:
|
||||
present = 0
|
||||
for title_word in title_words:
|
||||
if title_word in url:
|
||||
present += 1
|
||||
|
||||
similarity = present/len(title_words)
|
||||
# if at least 80% of words are in the URL, too similar
|
||||
if similarity >= 0.8:
|
||||
return False
|
||||
return True
|
||||
similarity = present/len(title_words)
|
||||
# less than 80% similar, proceed
|
||||
if similarity < 0.8:
|
||||
return True
|
||||
return False
|
||||
|
||||
def _get_title(self, server, channel, url):
|
||||
if not urllib.parse.urlparse(url).scheme:
|
||||
|
|
Loading…
Reference in a new issue