diff options
Diffstat (limited to '')
-rwxr-xr-x | cargo/cargo.py | 2 | ||||
-rw-r--r-- | cargo/poetry.lock | 50 | ||||
-rw-r--r-- | devourer/devourer.py | 38 |
3 files changed, 46 insertions, 44 deletions
diff --git a/cargo/cargo.py b/cargo/cargo.py index a5144dd..7957cd9 100755 --- a/cargo/cargo.py +++ b/cargo/cargo.py @@ -1,7 +1,9 @@ #!/usr/bin/env python3 +"""Cargo is meant to server as a file/server downloader service.""" import http.server import os import socketserver + import huggingface_hub as hh diff --git a/cargo/poetry.lock b/cargo/poetry.lock index a5020aa..a4e26f0 100644 --- a/cargo/poetry.lock +++ b/cargo/poetry.lock @@ -1,25 +1,25 @@ [[package]] name = "certifi" -version = "2022.5.18" +version = "2022.6.15" description = "Python package for providing Mozilla's CA Bundle." category = "main" optional = false -python-versions = ">=3.5" +python-versions = ">=3.6" [[package]] name = "charset-normalizer" -version = "2.0.12" +version = "2.1.0" description = "The Real First Universal Charset Detector. Open, modern and actively maintained alternative to Chardet." category = "main" optional = false -python-versions = ">=3.5.0" +python-versions = ">=3.6.0" [package.extras] unicode_backport = ["unicodedata2"] [[package]] name = "colorama" -version = "0.4.4" +version = "0.4.5" description = "Cross-platform colored terminal text." category = "main" optional = false @@ -27,7 +27,7 @@ python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*" [[package]] name = "filelock" -version = "3.7.0" +version = "3.7.1" description = "A platform independent file lock." category = "main" optional = false @@ -102,21 +102,21 @@ python-versions = ">=3.6" [[package]] name = "requests" -version = "2.27.1" +version = "2.28.1" description = "Python HTTP for Humans." category = "main" optional = false -python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*, !=3.5.*" +python-versions = ">=3.7, <4" [package.dependencies] certifi = ">=2017.4.17" -charset-normalizer = {version = ">=2.0.0,<2.1.0", markers = "python_version >= \"3\""} -idna = {version = ">=2.5,<4", markers = "python_version >= \"3\""} +charset-normalizer = ">=2,<3" +idna = ">=2.5,<4" urllib3 = ">=1.21.1,<1.27" [package.extras] -socks = ["PySocks (>=1.5.6,!=1.5.7)", "win-inet-pton"] -use_chardet_on_py3 = ["chardet (>=3.0.2,<5)"] +socks = ["PySocks (>=1.5.6,!=1.5.7)"] +use_chardet_on_py3 = ["chardet (>=3.0.2,<6)"] [[package]] name = "tqdm" @@ -137,7 +137,7 @@ telegram = ["requests"] [[package]] name = "typing-extensions" -version = "4.2.0" +version = "4.3.0" description = "Backported and Experimental Type Hints for Python 3.7+" category = "main" optional = false @@ -163,20 +163,20 @@ content-hash = "4873aecc7d8cca0e408b389e1b55748a66a9b4a8e6899aee321fab610098d5b8 [metadata.files] certifi = [ - {file = "certifi-2022.5.18-py3-none-any.whl", hash = "sha256:8d15a5a7fde18536a249c49e07e8e462b8fc13de21b3c80e8a68315dfa227c99"}, - {file = "certifi-2022.5.18.tar.gz", hash = "sha256:6ae10321df3e464305a46e997da41ea56c1d311fb9ff1dd4e04d6f14653ec63a"}, + {file = "certifi-2022.6.15-py3-none-any.whl", hash = "sha256:fe86415d55e84719d75f8b69414f6438ac3547d2078ab91b67e779ef69378412"}, + {file = "certifi-2022.6.15.tar.gz", hash = "sha256:84c85a9078b11105f04f3036a9482ae10e4621616db313fe045dd24743a0820d"}, ] charset-normalizer = [ - {file = "charset-normalizer-2.0.12.tar.gz", hash = "sha256:2857e29ff0d34db842cd7ca3230549d1a697f96ee6d3fb071cfa6c7393832597"}, - {file = "charset_normalizer-2.0.12-py3-none-any.whl", hash = "sha256:6881edbebdb17b39b4eaaa821b438bf6eddffb4468cf344f09f89def34a8b1df"}, + {file = "charset-normalizer-2.1.0.tar.gz", hash = "sha256:575e708016ff3a5e3681541cb9d79312c416835686d054a23accb873b254f413"}, + {file = "charset_normalizer-2.1.0-py3-none-any.whl", hash = "sha256:5189b6f22b01957427f35b6a08d9a0bc45b46d3788ef5a92e978433c7a35f8a5"}, ] colorama = [ - {file = "colorama-0.4.4-py2.py3-none-any.whl", hash = "sha256:9f47eda37229f68eee03b24b9748937c7dc3868f906e8ba69fbcbdd3bc5dc3e2"}, - {file = "colorama-0.4.4.tar.gz", hash = "sha256:5941b2b48a20143d2267e95b1c2a7603ce057ee39fd88e7329b0c292aa16869b"}, + {file = "colorama-0.4.5-py2.py3-none-any.whl", hash = "sha256:854bf444933e37f5824ae7bfc1e98d5bce2ebe4160d46b5edf346a89358e99da"}, + {file = "colorama-0.4.5.tar.gz", hash = "sha256:e6c6b4334fc50988a639d9b98aa429a0b57da6e17b9a44f0451f930b6967b7a4"}, ] filelock = [ - {file = "filelock-3.7.0-py3-none-any.whl", hash = "sha256:c7b5fdb219b398a5b28c8e4c1893ef5f98ece6a38c6ab2c22e26ec161556fed6"}, - {file = "filelock-3.7.0.tar.gz", hash = "sha256:b795f1b42a61bbf8ec7113c341dad679d772567b936fbd1bf43c9a238e673e20"}, + {file = "filelock-3.7.1-py3-none-any.whl", hash = "sha256:37def7b658813cda163b56fc564cdc75e86d338246458c4c28ae84cabefa2404"}, + {file = "filelock-3.7.1.tar.gz", hash = "sha256:3a0fd85166ad9dbab54c9aec96737b744106dc5f15c0b09a6744a445299fcf04"}, ] huggingface-hub = [ {file = "huggingface_hub-0.6.0-py3-none-any.whl", hash = "sha256:585d72adade562a1f7038acf39eb7677b7649bdc0ce082b70f99e01164d9d8b5"}, @@ -230,16 +230,16 @@ pyyaml = [ {file = "PyYAML-6.0.tar.gz", hash = "sha256:68fb519c14306fec9720a2a5b45bc9f0c8d1b9c72adf45c37baedfcd949c35a2"}, ] requests = [ - {file = "requests-2.27.1-py2.py3-none-any.whl", hash = "sha256:f22fa1e554c9ddfd16e6e41ac79759e17be9e492b3587efa038054674760e72d"}, - {file = "requests-2.27.1.tar.gz", hash = "sha256:68d7c56fd5a8999887728ef304a6d12edc7be74f1cfa47714fc8b414525c9a61"}, + {file = "requests-2.28.1-py3-none-any.whl", hash = "sha256:8fefa2a1a1365bf5520aac41836fbee479da67864514bdb821f31ce07ce65349"}, + {file = "requests-2.28.1.tar.gz", hash = "sha256:7c5599b102feddaa661c826c56ab4fee28bfd17f5abca1ebbe3e7f19d7c97983"}, ] tqdm = [ {file = "tqdm-4.64.0-py2.py3-none-any.whl", hash = "sha256:74a2cdefe14d11442cedf3ba4e21a3b84ff9a2dbdc6cfae2c34addb2a14a5ea6"}, {file = "tqdm-4.64.0.tar.gz", hash = "sha256:40be55d30e200777a307a7585aee69e4eabb46b4ec6a4b4a5f2d9f11e7d5408d"}, ] typing-extensions = [ - {file = "typing_extensions-4.2.0-py3-none-any.whl", hash = "sha256:6657594ee297170d19f67d55c05852a874e7eb634f4f753dbd667855e07c1708"}, - {file = "typing_extensions-4.2.0.tar.gz", hash = "sha256:f1c24655a0da0d1b67f07e17a5e6b2a105894e6824b92096378bb3668ef02376"}, + {file = "typing_extensions-4.3.0-py3-none-any.whl", hash = "sha256:25642c956049920a5aa49edcdd6ab1e06d7e5d467fc00e0506c44ac86fbfca02"}, + {file = "typing_extensions-4.3.0.tar.gz", hash = "sha256:e6d2677a32f47fc7eb2795db1dd15c1f34eff616bcaf2cfb5e997f854fa1c4a6"}, ] urllib3 = [ {file = "urllib3-1.26.9-py2.py3-none-any.whl", hash = "sha256:44ece4d53fb1706f667c9bd1c648f5469a2ec925fcf3a776667042d645472c14"}, diff --git a/devourer/devourer.py b/devourer/devourer.py index 552c237..9587a1d 100644 --- a/devourer/devourer.py +++ b/devourer/devourer.py @@ -7,6 +7,7 @@ import logging import os import tempfile import typing + import fastapi import gtts # type:ignore import newspaper # type:ignore @@ -15,18 +16,15 @@ import rake_nltk # type:ignore import readability # type:ignore import refextract # type:ignore import requests # type:ignore - from tika import parser as tparser # type:ignore -# import random - -description = """ +MODULE_DESCRIPTION = """ Devourer is a lightweight knowledge aggregator.</br> Right now though, its more of a personal assistant. It cant extract text and summarize it and turn it into audio.<br/> """ -tags_metadata = [ +TAGS_METADATA = [ { "name": "/mila/pdf", "description": "The PDF endpoint. It accepts urls that contain a " @@ -34,8 +32,8 @@ tags_metadata = [ }, { "name": "/mila/reqs", - "description": "This endpoint accepts a link to a RFC and returns" - " the requirements it extracts from it.", + "description": "This endpoint accepts a link to a RFC and returns " + "the requirements it extracts from it.", }, { "name": "/mila/wiki", @@ -43,16 +41,16 @@ tags_metadata = [ }, { "name": "/mila/summ", - "description": "The summary endpoint accepts a url as input" - " that contains an html page. devourer extracts the" - " __important__ text out of the page and then will either" - " summarize and turn into audio.", + "description": "The summary endpoint accepts a url as input " + "that contains an html page. devourer extracts the " + "__important__ text out of the page and then will either " + "summarize and turn into audio.", }, {"name": "/mila/health", "description": "The health endpoint."}, ] -# FIXME-maybe actually really do some logging +# TODO-maybe actually really do some logging def log_error(err: str) -> None: """Logs the errors.""" logging.exception(err) @@ -99,8 +97,8 @@ def config_news(config: newspaper.Config) -> None: config.browser_user_agent = "Chrome/91.0.4464.5" -newspaper_config = newspaper.Config() -config_news(newspaper_config) +NEWSPAPER_CONFIG = newspaper.Config() +config_news(NEWSPAPER_CONFIG) def sanitize_text(text: str) -> str: @@ -113,6 +111,7 @@ def sanitize_text(text: str) -> str: def extract_requirements(text_body: str) -> list: """Extract the sentences containing the keywords that denote a requirement. + the keywords are baed on ISO/IEC directives, part 2: https://www.iso.org/sites/directives/current/part2/index.xhtml """ @@ -263,10 +262,10 @@ def get_requirements(url: str, sourcetype: str) -> list: results = [] try: if sourcetype == "html": - parser = newspaper.build(url, newspaper_config) + parser = newspaper.build(url, NEWSPAPER_CONFIG) for article in parser.articles: art = newspaper.Article( - config=newspaper_config, url=article.url + config=NEWSPAPER_CONFIG, url=article.url ) art.download() art.parse() @@ -285,7 +284,7 @@ def summarize_link_to_audio(url: str, summary: str) -> str: """Summarizes the text inside a given url into audio.""" result = str() try: - article = newspaper.Article(config=newspaper_config, url=url) + article = newspaper.Article(config=NEWSPAPER_CONFIG, url=url) article.download() article.parse() if summary == "newspaper": @@ -306,6 +305,7 @@ def summarize_link_to_audio(url: str, summary: str) -> str: def search_wikipedia(search_term: str, summary: str) -> str: """Search wikipedia for a string and return the url. + reference: https://www.mediawiki.org/wiki/API:Opensearch. """ result = str() @@ -343,7 +343,7 @@ def get_keywords_from_text(text: str) -> typing.List[str]: app = fastapi.FastAPI( title="Devourer", - description=description, + description=MODULE_DESCRIPTION, contact={ "name": "farzad sadeghi", "url": "https://github.com/terminaldweller/devourer", @@ -353,7 +353,7 @@ app = fastapi.FastAPI( "name": "GPL v3.0", "url": "https://www.gnu.org/licenses/gpl-3.0.en.html", }, - openapi_tags=tags_metadata, + openapi_tags=TAGS_METADATA, ) nltk.download("punkt") |