don't similarity test empty <title>s
This commit is contained in:
parent
cc7c66883d
commit
a5acf0e3aa
1 changed files with 10 additions and 9 deletions
|
@ -29,16 +29,17 @@ class Module(ModuleManager.BaseModule):
|
||||||
title_word = title_word.lower()
|
title_word = title_word.lower()
|
||||||
title_words.append(title_word.strip("'\"<>()"))
|
title_words.append(title_word.strip("'\"<>()"))
|
||||||
|
|
||||||
present = 0
|
if title_words:
|
||||||
for title_word in title_words:
|
present = 0
|
||||||
if title_word in url:
|
for title_word in title_words:
|
||||||
present += 1
|
if title_word in url:
|
||||||
|
present += 1
|
||||||
|
|
||||||
similarity = present/len(title_words)
|
similarity = present/len(title_words)
|
||||||
# if at least 80% of words are in the URL, too similar
|
# less than 80% similar, proceed
|
||||||
if similarity >= 0.8:
|
if similarity < 0.8:
|
||||||
return False
|
return True
|
||||||
return True
|
return False
|
||||||
|
|
||||||
def _get_title(self, server, channel, url):
|
def _get_title(self, server, channel, url):
|
||||||
if not urllib.parse.urlparse(url).scheme:
|
if not urllib.parse.urlparse(url).scheme:
|
||||||
|
|
Loading…
Reference in a new issue