diff options
Diffstat (limited to '')
| -rwxr-xr-x | main.py | 19 | 
1 files changed, 12 insertions, 7 deletions
| @@ -6,7 +6,7 @@ import logging  from newspaper import Article, build, Config  from bs4 import BeautifulSoup  from contextlib import closing -from requests import get +from requests import get, Response  from requests.exceptions import RequestException  from re import findall  from readability import Document @@ -26,17 +26,20 @@ class Argparser(object):  # TODO-maybe actually really do some logging -def logError(err): +def logError(err: RequestException) -> None: +    """logs the errors"""      logging.exception(err) -def isAGoodResponse(resp): +def isAGoodResponse(resp: Response) -> bool: +    """checks whether the get we sent got a 200 response"""      content_type = resp.headers['Content-Type'].lower()      return (resp.status_code == 200 and              content_type is not None and content_type.find("html") > -1) -def simpleGet(url): +def simpleGet(url: str) -> bytes: +    """issues a simple get request to download a website"""      try:          with closing(get(url, stream=True)) as resp:              if isAGoodResponse(resp): @@ -48,7 +51,8 @@ def simpleGet(url):          return None -def getURLS(source): +def getURLS(source: str) -> dict: +    """extracts the urls from a website"""      result = dict()      raw_ml = simpleGet(source)      ml = BeautifulSoup(raw_ml, "lxml") @@ -69,14 +73,15 @@ def getURLS(source):      return result -def configNews(config): +def configNews(config: Config) -> None: +    """configures newspaper"""      config.fetch_images = False      config.keep_article_html = True      config.memoize_articles = False      config.browser_user_agent = "Chrome/91.0.4464.5" -def main(): +def main() -> None:      argparser = Argparser()      config = Config()      configNews(config) | 
