From 5f67821f49a8e3c4573c4d3a8af977ff61dd51f3 Mon Sep 17 00:00:00 2001 From: terminaldweller Date: Thu, 29 Jul 2021 21:28:24 +0430 Subject: added config for newspaper. added gitpod configs. --- main.py | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) (limited to 'main.py') diff --git a/main.py b/main.py index 00ce6d1..e5794e1 100755 --- a/main.py +++ b/main.py @@ -3,7 +3,7 @@ import argparse import logging -from newspaper import Article, build +from newspaper import Article, build, Config from bs4 import BeautifulSoup from contextlib import closing from requests import get @@ -24,7 +24,7 @@ class Argparser(object): # TODO-maybe actually really do some logging def logError(err): - print(err) + logging.exception(err) def isAGoodResponse(resp): @@ -66,12 +66,18 @@ def getURLS(source): return result +def configNews(config): + config.fetch_images = False + config.keep_article_html = True + config.memoize_articles = False + config.browser_user_agent = "Chrome/91.0.4464.5" + + def main(): argparser = Argparser() + config = Config() + configNews(config) urls = getURLS(argparser.args.source) - # import sys - # print(urls) - # sys.exit(0) for url in urls: parser = build(url) for article in parser.articles: @@ -79,6 +85,7 @@ def main(): try: a.download() a.parse() + # print(a.html) print(a.text) except Exception as e: logging.exception(e) -- cgit v1.2.3