aboutsummaryrefslogtreecommitdiffstats
path: root/main.py
diff options
context:
space:
mode:
authorterminaldweller <thabogre@gmail.com>2021-07-29 16:58:24 +0000
committerterminaldweller <thabogre@gmail.com>2021-07-29 16:58:24 +0000
commit5f67821f49a8e3c4573c4d3a8af977ff61dd51f3 (patch)
tree8205ae4a8e527d0d0e34a0821471f106ab7fc4a6 /main.py
parentno need for externally getting the links. devourer can do that on its own now... (diff)
downloaddevourer-5f67821f49a8e3c4573c4d3a8af977ff61dd51f3.tar.gz
devourer-5f67821f49a8e3c4573c4d3a8af977ff61dd51f3.zip
added config for newspaper. added gitpod configs.
Diffstat (limited to '')
-rwxr-xr-xmain.py17
1 files changed, 12 insertions, 5 deletions
diff --git a/main.py b/main.py
index 00ce6d1..e5794e1 100755
--- a/main.py
+++ b/main.py
@@ -3,7 +3,7 @@
import argparse
import logging
-from newspaper import Article, build
+from newspaper import Article, build, Config
from bs4 import BeautifulSoup
from contextlib import closing
from requests import get
@@ -24,7 +24,7 @@ class Argparser(object):
# TODO-maybe actually really do some logging
def logError(err):
- print(err)
+ logging.exception(err)
def isAGoodResponse(resp):
@@ -66,12 +66,18 @@ def getURLS(source):
return result
+def configNews(config):
+ config.fetch_images = False
+ config.keep_article_html = True
+ config.memoize_articles = False
+ config.browser_user_agent = "Chrome/91.0.4464.5"
+
+
def main():
argparser = Argparser()
+ config = Config()
+ configNews(config)
urls = getURLS(argparser.args.source)
- # import sys
- # print(urls)
- # sys.exit(0)
for url in urls:
parser = build(url)
for article in parser.articles:
@@ -79,6 +85,7 @@ def main():
try:
a.download()
a.parse()
+ # print(a.html)
print(a.text)
except Exception as e:
logging.exception(e)