From 7588de5141624221dfde33b9f3e6dbd75ef31083 Mon Sep 17 00:00:00 2001 From: terminaldweller Date: Fri, 6 Aug 2021 11:02:10 +0430 Subject: added typehints and comments --- main.py | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) diff --git a/main.py b/main.py index c72b674..ef89ca1 100755 --- a/main.py +++ b/main.py @@ -6,7 +6,7 @@ import logging from newspaper import Article, build, Config from bs4 import BeautifulSoup from contextlib import closing -from requests import get +from requests import get, Response from requests.exceptions import RequestException from re import findall from readability import Document @@ -26,17 +26,20 @@ class Argparser(object): # TODO-maybe actually really do some logging -def logError(err): +def logError(err: RequestException) -> None: + """logs the errors""" logging.exception(err) -def isAGoodResponse(resp): +def isAGoodResponse(resp: Response) -> bool: + """checks whether the get we sent got a 200 response""" content_type = resp.headers['Content-Type'].lower() return (resp.status_code == 200 and content_type is not None and content_type.find("html") > -1) -def simpleGet(url): +def simpleGet(url: str) -> bytes: + """issues a simple get request to download a website""" try: with closing(get(url, stream=True)) as resp: if isAGoodResponse(resp): @@ -48,7 +51,8 @@ def simpleGet(url): return None -def getURLS(source): +def getURLS(source: str) -> dict: + """extracts the urls from a website""" result = dict() raw_ml = simpleGet(source) ml = BeautifulSoup(raw_ml, "lxml") @@ -69,14 +73,15 @@ def getURLS(source): return result -def configNews(config): +def configNews(config: Config) -> None: + """configures newspaper""" config.fetch_images = False config.keep_article_html = True config.memoize_articles = False config.browser_user_agent = "Chrome/91.0.4464.5" -def main(): +def main() -> None: argparser = Argparser() config = Config() configNews(config) -- cgit v1.2.3