2016-03-29 15:21:27 +00:00
|
|
|
import json, re, traceback, urllib.request, urllib.parse, urllib.error
|
2016-03-29 11:56:58 +00:00
|
|
|
import bs4
|
|
|
|
|
|
|
|
USER_AGENT = ("Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 "
|
|
|
|
"(KHTML, like Gecko) Chrome/49.0.2623.87 Safari/537.36")
|
|
|
|
REGEX_HTTP = re.compile("https?://", re.I)
|
|
|
|
|
|
|
|
def remove_colon(s):
|
|
|
|
if s.startswith(":"):
|
|
|
|
s = s[1:]
|
|
|
|
return s
|
|
|
|
|
|
|
|
def arbitrary(s, n):
|
|
|
|
return remove_colon(" ".join(s[n:]))
|
|
|
|
|
|
|
|
def seperate_hostmask(hostmask):
|
|
|
|
hostmask = remove_colon(hostmask)
|
|
|
|
first_delim = hostmask.find("!")
|
|
|
|
second_delim = hostmask.find("@")
|
|
|
|
nickname = username = hostname = hostmask
|
|
|
|
if first_delim > -1 and second_delim > first_delim:
|
|
|
|
nickname, username = hostmask.split("!", 1)
|
|
|
|
username, hostname = hostmask.split("@", 1)
|
|
|
|
return nickname, username, hostname
|
|
|
|
|
|
|
|
def get_url(url, **kwargs):
|
|
|
|
scheme = urllib.parse.urlparse(url).scheme
|
|
|
|
if not scheme:
|
|
|
|
url = "http://%s" % url
|
|
|
|
|
|
|
|
method = kwargs.get("method", "GET")
|
|
|
|
get_params = kwargs.get("get_params", "")
|
|
|
|
post_params = kwargs.get("post_params", None)
|
|
|
|
headers = kwargs.get("headers", {})
|
|
|
|
if get_params:
|
|
|
|
get_params = "?%s" % urllib.parse.urlencode(get_params)
|
|
|
|
if post_params:
|
|
|
|
post_params = urllib.parse.urlencode(post_params).encode("utf8")
|
|
|
|
url = "%s%s" % (url, get_params)
|
2016-03-29 20:05:07 +00:00
|
|
|
try:
|
|
|
|
url.encode("latin-1")
|
|
|
|
except UnicodeEncodeError:
|
|
|
|
if kwargs.get("code"):
|
|
|
|
return 0, False
|
|
|
|
return False
|
2016-03-29 11:56:58 +00:00
|
|
|
|
|
|
|
request = urllib.request.Request(url, post_params)
|
|
|
|
request.add_header("Accept-Language", "en-US")
|
|
|
|
request.add_header("User-Agent", USER_AGENT)
|
|
|
|
for header, value in headers.items():
|
|
|
|
request.add_header(header, value)
|
|
|
|
request.method = method
|
|
|
|
|
|
|
|
try:
|
|
|
|
response = urllib.request.urlopen(request)
|
2016-03-29 15:21:27 +00:00
|
|
|
except urllib.error.HTTPError as e:
|
2016-03-29 11:56:58 +00:00
|
|
|
traceback.print_exc()
|
2016-03-29 15:21:27 +00:00
|
|
|
if kwargs.get("code"):
|
|
|
|
return e.code, False
|
|
|
|
else:
|
|
|
|
return False
|
2016-03-29 11:56:58 +00:00
|
|
|
|
|
|
|
response_content = response.read()
|
|
|
|
encoding = response.info().get_content_charset()
|
|
|
|
if kwargs.get("soup"):
|
|
|
|
return bs4.BeautifulSoup(response_content, "lxml")
|
|
|
|
if not encoding:
|
|
|
|
soup = bs4.BeautifulSoup(response_content, "lxml")
|
|
|
|
metas = soup.find_all("meta")
|
|
|
|
for meta in metas:
|
|
|
|
if "charset=" in meta.get("content", ""):
|
|
|
|
encoding = meta.get("content").split("charset=", 1)[1
|
|
|
|
].split(";", 1)[0]
|
|
|
|
elif meta.get("charset", ""):
|
|
|
|
encoding = meta.get("charset")
|
|
|
|
else:
|
|
|
|
continue
|
|
|
|
break
|
|
|
|
if not encoding:
|
|
|
|
for item in soup.contents:
|
|
|
|
if isinstance(item, bs4.Doctype):
|
|
|
|
if item == "html":
|
|
|
|
encoding = "utf8"
|
|
|
|
else:
|
|
|
|
encoding = "latin-1"
|
|
|
|
break
|
|
|
|
response_content = response_content.decode(encoding or "utf8")
|
|
|
|
data = response_content
|
|
|
|
if kwargs.get("json"):
|
|
|
|
try:
|
|
|
|
data = json.loads(response_content)
|
|
|
|
except json.decoder.JSONDecodeError:
|
|
|
|
traceback.print_exc()
|
|
|
|
return False
|
|
|
|
if kwargs.get("code"):
|
|
|
|
return response.code, data
|
|
|
|
else:
|
|
|
|
return data
|
|
|
|
|
|
|
|
COLOR_WHITE, COLOR_BLACK, COLOR_BLUE, COLOR_GREEN = 0, 1, 2, 3
|
|
|
|
COLOR_RED, COLOR_BROWN, COLOR_PURPLE, COLOR_ORANGE = 4, 5, 6, 7
|
|
|
|
COLOR_YELLOW, COLOR_LIGHTGREEN, COLOR_CYAN, COLOR_LIGHTCYAN = (8, 9,
|
|
|
|
10, 11)
|
|
|
|
COLOR_LIGHTBLUE, COLOR_PINK, COLOR_GREY, COLOR_LIGHTGREY = (12, 13,
|
|
|
|
14, 15)
|
|
|
|
FONT_BOLD, FONT_ITALIC, FONT_UNDERLINE, FONT_INVERT = ("\x02", "\x1D",
|
|
|
|
"\x1F", "\x16")
|
|
|
|
FONT_COLOR, FONT_RESET = "\x03", "\x0F"
|
|
|
|
|
|
|
|
def color(foreground, background=None):
|
|
|
|
foreground = str(foreground).zfill(2)
|
|
|
|
if background:
|
|
|
|
background = str(backbround).zfill(2)
|
|
|
|
return "%s%s%s" % (FONT_COLOR, foreground,
|
|
|
|
"" if not background else ",%s" % background)
|
|
|
|
|
|
|
|
TIME_SECOND = 1
|
|
|
|
TIME_MINUTE = TIME_SECOND*60
|
|
|
|
TIME_HOUR = TIME_MINUTE*60
|
|
|
|
TIME_DAY = TIME_HOUR*24
|
|
|
|
TIME_WEEK = TIME_DAY*7
|
|
|
|
|
|
|
|
def time_unit(seconds):
|
|
|
|
since = None
|
|
|
|
unit = None
|
|
|
|
if seconds >= TIME_WEEK:
|
|
|
|
since = seconds/TIME_WEEK
|
|
|
|
unit = "week"
|
|
|
|
elif seconds >= TIME_DAY:
|
|
|
|
since = seconds/TIME_DAY
|
|
|
|
unit = "day"
|
|
|
|
elif seconds >= TIME_HOUR:
|
|
|
|
since = seconds/TIME_HOUR
|
|
|
|
unit = "hour"
|
|
|
|
elif seconds >= TIME_MINUTE:
|
|
|
|
since = seconds/TIME_MINUTE
|
|
|
|
unit = "minute"
|
|
|
|
else:
|
|
|
|
since = seconds
|
|
|
|
unit = "second"
|
|
|
|
since = int(since)
|
|
|
|
if since > 1:
|
|
|
|
unit = "%ss" % unit # pluralise the unit
|
|
|
|
return [since, unit]
|
|
|
|
|
|
|
|
REGEX_PRETTYTIME = re.compile("\d+[wdhms]", re.I)
|
|
|
|
|
|
|
|
SECONDS_MINUTES = 60
|
|
|
|
SECONDS_HOURS = SECONDS_MINUTES*60
|
|
|
|
SECONDS_DAYS = SECONDS_HOURS*24
|
|
|
|
SECONDS_WEEKS = SECONDS_DAYS*7
|
|
|
|
|
|
|
|
def from_pretty_time(pretty_time):
|
|
|
|
seconds = 0
|
|
|
|
for match in re.findall(REGEX_PRETTYTIME, pretty_time):
|
|
|
|
number, unit = int(match[:-1]), match[-1].lower()
|
|
|
|
if unit == "m":
|
|
|
|
number = number*SECONDS_MINUTES
|
|
|
|
elif unit == "h":
|
|
|
|
number = number*SECONDS_HOURS
|
|
|
|
elif unit == "d":
|
|
|
|
number = number*SECONDS_DAYS
|
|
|
|
elif unit == "w":
|
|
|
|
number = number*SECONDS_WEEKS
|
|
|
|
seconds += number
|
|
|
|
if seconds > 0:
|
|
|
|
return seconds
|