aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
Diffstat (limited to '')
-rwxr-xr-xcargo/cargo.py2
-rw-r--r--cargo/poetry.lock50
-rw-r--r--devourer/devourer.py38
3 files changed, 46 insertions, 44 deletions
diff --git a/cargo/cargo.py b/cargo/cargo.py
index a5144dd..7957cd9 100755
--- a/cargo/cargo.py
+++ b/cargo/cargo.py
@@ -1,7 +1,9 @@
#!/usr/bin/env python3
+"""Cargo is meant to server as a file/server downloader service."""
import http.server
import os
import socketserver
+
import huggingface_hub as hh
diff --git a/cargo/poetry.lock b/cargo/poetry.lock
index a5020aa..a4e26f0 100644
--- a/cargo/poetry.lock
+++ b/cargo/poetry.lock
@@ -1,25 +1,25 @@
[[package]]
name = "certifi"
-version = "2022.5.18"
+version = "2022.6.15"
description = "Python package for providing Mozilla's CA Bundle."
category = "main"
optional = false
-python-versions = ">=3.5"
+python-versions = ">=3.6"
[[package]]
name = "charset-normalizer"
-version = "2.0.12"
+version = "2.1.0"
description = "The Real First Universal Charset Detector. Open, modern and actively maintained alternative to Chardet."
category = "main"
optional = false
-python-versions = ">=3.5.0"
+python-versions = ">=3.6.0"
[package.extras]
unicode_backport = ["unicodedata2"]
[[package]]
name = "colorama"
-version = "0.4.4"
+version = "0.4.5"
description = "Cross-platform colored terminal text."
category = "main"
optional = false
@@ -27,7 +27,7 @@ python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*"
[[package]]
name = "filelock"
-version = "3.7.0"
+version = "3.7.1"
description = "A platform independent file lock."
category = "main"
optional = false
@@ -102,21 +102,21 @@ python-versions = ">=3.6"
[[package]]
name = "requests"
-version = "2.27.1"
+version = "2.28.1"
description = "Python HTTP for Humans."
category = "main"
optional = false
-python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*, !=3.5.*"
+python-versions = ">=3.7, <4"
[package.dependencies]
certifi = ">=2017.4.17"
-charset-normalizer = {version = ">=2.0.0,<2.1.0", markers = "python_version >= \"3\""}
-idna = {version = ">=2.5,<4", markers = "python_version >= \"3\""}
+charset-normalizer = ">=2,<3"
+idna = ">=2.5,<4"
urllib3 = ">=1.21.1,<1.27"
[package.extras]
-socks = ["PySocks (>=1.5.6,!=1.5.7)", "win-inet-pton"]
-use_chardet_on_py3 = ["chardet (>=3.0.2,<5)"]
+socks = ["PySocks (>=1.5.6,!=1.5.7)"]
+use_chardet_on_py3 = ["chardet (>=3.0.2,<6)"]
[[package]]
name = "tqdm"
@@ -137,7 +137,7 @@ telegram = ["requests"]
[[package]]
name = "typing-extensions"
-version = "4.2.0"
+version = "4.3.0"
description = "Backported and Experimental Type Hints for Python 3.7+"
category = "main"
optional = false
@@ -163,20 +163,20 @@ content-hash = "4873aecc7d8cca0e408b389e1b55748a66a9b4a8e6899aee321fab610098d5b8
[metadata.files]
certifi = [
- {file = "certifi-2022.5.18-py3-none-any.whl", hash = "sha256:8d15a5a7fde18536a249c49e07e8e462b8fc13de21b3c80e8a68315dfa227c99"},
- {file = "certifi-2022.5.18.tar.gz", hash = "sha256:6ae10321df3e464305a46e997da41ea56c1d311fb9ff1dd4e04d6f14653ec63a"},
+ {file = "certifi-2022.6.15-py3-none-any.whl", hash = "sha256:fe86415d55e84719d75f8b69414f6438ac3547d2078ab91b67e779ef69378412"},
+ {file = "certifi-2022.6.15.tar.gz", hash = "sha256:84c85a9078b11105f04f3036a9482ae10e4621616db313fe045dd24743a0820d"},
]
charset-normalizer = [
- {file = "charset-normalizer-2.0.12.tar.gz", hash = "sha256:2857e29ff0d34db842cd7ca3230549d1a697f96ee6d3fb071cfa6c7393832597"},
- {file = "charset_normalizer-2.0.12-py3-none-any.whl", hash = "sha256:6881edbebdb17b39b4eaaa821b438bf6eddffb4468cf344f09f89def34a8b1df"},
+ {file = "charset-normalizer-2.1.0.tar.gz", hash = "sha256:575e708016ff3a5e3681541cb9d79312c416835686d054a23accb873b254f413"},
+ {file = "charset_normalizer-2.1.0-py3-none-any.whl", hash = "sha256:5189b6f22b01957427f35b6a08d9a0bc45b46d3788ef5a92e978433c7a35f8a5"},
]
colorama = [
- {file = "colorama-0.4.4-py2.py3-none-any.whl", hash = "sha256:9f47eda37229f68eee03b24b9748937c7dc3868f906e8ba69fbcbdd3bc5dc3e2"},
- {file = "colorama-0.4.4.tar.gz", hash = "sha256:5941b2b48a20143d2267e95b1c2a7603ce057ee39fd88e7329b0c292aa16869b"},
+ {file = "colorama-0.4.5-py2.py3-none-any.whl", hash = "sha256:854bf444933e37f5824ae7bfc1e98d5bce2ebe4160d46b5edf346a89358e99da"},
+ {file = "colorama-0.4.5.tar.gz", hash = "sha256:e6c6b4334fc50988a639d9b98aa429a0b57da6e17b9a44f0451f930b6967b7a4"},
]
filelock = [
- {file = "filelock-3.7.0-py3-none-any.whl", hash = "sha256:c7b5fdb219b398a5b28c8e4c1893ef5f98ece6a38c6ab2c22e26ec161556fed6"},
- {file = "filelock-3.7.0.tar.gz", hash = "sha256:b795f1b42a61bbf8ec7113c341dad679d772567b936fbd1bf43c9a238e673e20"},
+ {file = "filelock-3.7.1-py3-none-any.whl", hash = "sha256:37def7b658813cda163b56fc564cdc75e86d338246458c4c28ae84cabefa2404"},
+ {file = "filelock-3.7.1.tar.gz", hash = "sha256:3a0fd85166ad9dbab54c9aec96737b744106dc5f15c0b09a6744a445299fcf04"},
]
huggingface-hub = [
{file = "huggingface_hub-0.6.0-py3-none-any.whl", hash = "sha256:585d72adade562a1f7038acf39eb7677b7649bdc0ce082b70f99e01164d9d8b5"},
@@ -230,16 +230,16 @@ pyyaml = [
{file = "PyYAML-6.0.tar.gz", hash = "sha256:68fb519c14306fec9720a2a5b45bc9f0c8d1b9c72adf45c37baedfcd949c35a2"},
]
requests = [
- {file = "requests-2.27.1-py2.py3-none-any.whl", hash = "sha256:f22fa1e554c9ddfd16e6e41ac79759e17be9e492b3587efa038054674760e72d"},
- {file = "requests-2.27.1.tar.gz", hash = "sha256:68d7c56fd5a8999887728ef304a6d12edc7be74f1cfa47714fc8b414525c9a61"},
+ {file = "requests-2.28.1-py3-none-any.whl", hash = "sha256:8fefa2a1a1365bf5520aac41836fbee479da67864514bdb821f31ce07ce65349"},
+ {file = "requests-2.28.1.tar.gz", hash = "sha256:7c5599b102feddaa661c826c56ab4fee28bfd17f5abca1ebbe3e7f19d7c97983"},
]
tqdm = [
{file = "tqdm-4.64.0-py2.py3-none-any.whl", hash = "sha256:74a2cdefe14d11442cedf3ba4e21a3b84ff9a2dbdc6cfae2c34addb2a14a5ea6"},
{file = "tqdm-4.64.0.tar.gz", hash = "sha256:40be55d30e200777a307a7585aee69e4eabb46b4ec6a4b4a5f2d9f11e7d5408d"},
]
typing-extensions = [
- {file = "typing_extensions-4.2.0-py3-none-any.whl", hash = "sha256:6657594ee297170d19f67d55c05852a874e7eb634f4f753dbd667855e07c1708"},
- {file = "typing_extensions-4.2.0.tar.gz", hash = "sha256:f1c24655a0da0d1b67f07e17a5e6b2a105894e6824b92096378bb3668ef02376"},
+ {file = "typing_extensions-4.3.0-py3-none-any.whl", hash = "sha256:25642c956049920a5aa49edcdd6ab1e06d7e5d467fc00e0506c44ac86fbfca02"},
+ {file = "typing_extensions-4.3.0.tar.gz", hash = "sha256:e6d2677a32f47fc7eb2795db1dd15c1f34eff616bcaf2cfb5e997f854fa1c4a6"},
]
urllib3 = [
{file = "urllib3-1.26.9-py2.py3-none-any.whl", hash = "sha256:44ece4d53fb1706f667c9bd1c648f5469a2ec925fcf3a776667042d645472c14"},
diff --git a/devourer/devourer.py b/devourer/devourer.py
index 552c237..9587a1d 100644
--- a/devourer/devourer.py
+++ b/devourer/devourer.py
@@ -7,6 +7,7 @@ import logging
import os
import tempfile
import typing
+
import fastapi
import gtts # type:ignore
import newspaper # type:ignore
@@ -15,18 +16,15 @@ import rake_nltk # type:ignore
import readability # type:ignore
import refextract # type:ignore
import requests # type:ignore
-
from tika import parser as tparser # type:ignore
-# import random
-
-description = """
+MODULE_DESCRIPTION = """
Devourer is a lightweight knowledge aggregator.</br>
Right now though, its more of
a personal assistant. It cant extract text and summarize it and
turn it into audio.<br/>
"""
-tags_metadata = [
+TAGS_METADATA = [
{
"name": "/mila/pdf",
"description": "The PDF endpoint. It accepts urls that contain a "
@@ -34,8 +32,8 @@ tags_metadata = [
},
{
"name": "/mila/reqs",
- "description": "This endpoint accepts a link to a RFC and returns"
- " the requirements it extracts from it.",
+ "description": "This endpoint accepts a link to a RFC and returns "
+ "the requirements it extracts from it.",
},
{
"name": "/mila/wiki",
@@ -43,16 +41,16 @@ tags_metadata = [
},
{
"name": "/mila/summ",
- "description": "The summary endpoint accepts a url as input"
- " that contains an html page. devourer extracts the"
- " __important__ text out of the page and then will either"
- " summarize and turn into audio.",
+ "description": "The summary endpoint accepts a url as input "
+ "that contains an html page. devourer extracts the "
+ "__important__ text out of the page and then will either "
+ "summarize and turn into audio.",
},
{"name": "/mila/health", "description": "The health endpoint."},
]
-# FIXME-maybe actually really do some logging
+# TODO-maybe actually really do some logging
def log_error(err: str) -> None:
"""Logs the errors."""
logging.exception(err)
@@ -99,8 +97,8 @@ def config_news(config: newspaper.Config) -> None:
config.browser_user_agent = "Chrome/91.0.4464.5"
-newspaper_config = newspaper.Config()
-config_news(newspaper_config)
+NEWSPAPER_CONFIG = newspaper.Config()
+config_news(NEWSPAPER_CONFIG)
def sanitize_text(text: str) -> str:
@@ -113,6 +111,7 @@ def sanitize_text(text: str) -> str:
def extract_requirements(text_body: str) -> list:
"""Extract the sentences containing the keywords that denote a requirement.
+
the keywords are baed on ISO/IEC directives, part 2:
https://www.iso.org/sites/directives/current/part2/index.xhtml
"""
@@ -263,10 +262,10 @@ def get_requirements(url: str, sourcetype: str) -> list:
results = []
try:
if sourcetype == "html":
- parser = newspaper.build(url, newspaper_config)
+ parser = newspaper.build(url, NEWSPAPER_CONFIG)
for article in parser.articles:
art = newspaper.Article(
- config=newspaper_config, url=article.url
+ config=NEWSPAPER_CONFIG, url=article.url
)
art.download()
art.parse()
@@ -285,7 +284,7 @@ def summarize_link_to_audio(url: str, summary: str) -> str:
"""Summarizes the text inside a given url into audio."""
result = str()
try:
- article = newspaper.Article(config=newspaper_config, url=url)
+ article = newspaper.Article(config=NEWSPAPER_CONFIG, url=url)
article.download()
article.parse()
if summary == "newspaper":
@@ -306,6 +305,7 @@ def summarize_link_to_audio(url: str, summary: str) -> str:
def search_wikipedia(search_term: str, summary: str) -> str:
"""Search wikipedia for a string and return the url.
+
reference: https://www.mediawiki.org/wiki/API:Opensearch.
"""
result = str()
@@ -343,7 +343,7 @@ def get_keywords_from_text(text: str) -> typing.List[str]:
app = fastapi.FastAPI(
title="Devourer",
- description=description,
+ description=MODULE_DESCRIPTION,
contact={
"name": "farzad sadeghi",
"url": "https://github.com/terminaldweller/devourer",
@@ -353,7 +353,7 @@ app = fastapi.FastAPI(
"name": "GPL v3.0",
"url": "https://www.gnu.org/licenses/gpl-3.0.en.html",
},
- openapi_tags=tags_metadata,
+ openapi_tags=TAGS_METADATA,
)
nltk.download("punkt")