bitbot-3.11-fork/src/utils/http.py

import re, traceback, urllib.error, urllib.parse
import json as _json
import bs4, requests

USER_AGENT = ("Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 "
    "(KHTML, like Gecko) Chrome/49.0.2623.87 Safari/537.36")
REGEX_HTTP = re.compile("https?://", re.I)

def get_url(url, method="GET", get_params={}, post_data=None, headers={},
        json_data=None, code=False, json=False, soup=False, parser="lxml"):

    if not urllib.parse.urlparse(url).scheme:
        url = "http://%s" % url

    if not "Accept-Language" in headers:
        headers["Accept-Language"] = "en-GB"
    if not "User-Agent" in headers:
        headers["User-Agent"] = USER_AGENT

    response = requests.request(
        method.upper(),
        url,
        headers=headers,
        params=get_params,
        data=post_data,
        json=json_data
    )

    if soup:
        soup = bs4.BeautifulSoup(response.text, parser)
        if code:
            return response.code, soup
        return soup

    data = response.text
    if json and data:
        try:
            data = _json.loads(data)
        except _json.decoder.JSONDecodeError:
            traceback.print_exc()
            if code:
                return 0, False
            return False

    if code:
        return response.status_code, data
    else:
        return data

def strip_html(s):
    return bs4.BeautifulSoup(s, "lxml").get_text()
Change utils.http to use requests 2018-10-10 12:41:58 +00:00			`import re, traceback, urllib.error, urllib.parse`
			`import json as _json`
			`import bs4, requests`
Move src/Utils.py in to src/utils/, splitting functionality out in to modules of related functionality 2018-10-03 12:22:37 +00:00
			`USER_AGENT = ("Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 "`
			`"(KHTML, like Gecko) Chrome/49.0.2623.87 Safari/537.36")`
			`REGEX_HTTP = re.compile("https?://", re.I)`

Change utils.http to use requests 2018-10-10 12:41:58 +00:00			`def get_url(url, method="GET", get_params={}, post_data=None, headers={},`
			`json_data=None, code=False, json=False, soup=False, parser="lxml"):`

Move src/Utils.py in to src/utils/, splitting functionality out in to modules of related functionality 2018-10-03 12:22:37 +00:00			`if not urllib.parse.urlparse(url).scheme:`
			`url = "http://%s" % url`

Change utils.http to use requests 2018-10-10 12:41:58 +00:00			`if not "Accept-Language" in headers:`
			`headers["Accept-Language"] = "en-GB"`
			`if not "User-Agent" in headers:`
			`headers["User-Agent"] = USER_AGENT`

			`response = requests.request(`
			`method.upper(),`
			`url,`
			`headers=headers,`
			`params=get_params,`
			`data=post_data,`
			`json=json_data`
			`)`

			`if soup:`
			`soup = bs4.BeautifulSoup(response.text, parser)`
			`if code:`
Return response code from utils.http.get_url when code=True and soup=True 2018-10-09 21:16:04 +00:00			`return response.code, soup`
			`return soup`

Change utils.http to use requests 2018-10-10 12:41:58 +00:00			`data = response.text`
			`if json and data:`
Move src/Utils.py in to src/utils/, splitting functionality out in to modules of related functionality 2018-10-03 12:22:37 +00:00			`try:`
Change utils.http to use requests 2018-10-10 12:41:58 +00:00			`data = _json.loads(data)`
			`except _json.decoder.JSONDecodeError:`
Move src/Utils.py in to src/utils/, splitting functionality out in to modules of related functionality 2018-10-03 12:22:37 +00:00			`traceback.print_exc()`
Change utils.http to use requests 2018-10-10 12:41:58 +00:00			`if code:`
			`return 0, False`
Move src/Utils.py in to src/utils/, splitting functionality out in to modules of related functionality 2018-10-03 12:22:37 +00:00			`return False`
Change utils.http to use requests 2018-10-10 12:41:58 +00:00
			`if code:`
			`return response.status_code, data`
Move src/Utils.py in to src/utils/, splitting functionality out in to modules of related functionality 2018-10-03 12:22:37 +00:00			`else:`
			`return data`

			`def strip_html(s):`
			`return bs4.BeautifulSoup(s, "lxml").get_text()`