aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorterminaldweller <thabogre@gmail.com>2022-05-19 08:11:02 +0000
committerterminaldweller <thabogre@gmail.com>2022-05-19 08:11:02 +0000
commitc081f7ae09c770f63cc70a12fcb6901ad5ebd344 (patch)
tree9b75b2f00671c24eeb0c00f0e2034ee66eb269dd
parentupdate (diff)
downloaddevourer-c081f7ae09c770f63cc70a12fcb6901ad5ebd344.tar.gz
devourer-c081f7ae09c770f63cc70a12fcb6901ad5ebd344.zip
push before a new branchold
-rw-r--r--devourer.py5
-rw-r--r--poetry.lock169
-rw-r--r--pyproject.toml1
3 files changed, 173 insertions, 2 deletions
diff --git a/devourer.py b/devourer.py
index 995ab52..34185ae 100644
--- a/devourer.py
+++ b/devourer.py
@@ -180,6 +180,10 @@ def summarizeText(text: str) -> str:
]
+def summarizeText_v2(text: str) -> str:
+ pass
+
+
def textToAudio(text: str) -> str:
"""Transform the given text into audio."""
path = str()
@@ -326,6 +330,7 @@ async def addSecureHeaders(
nltk.download("punkt")
+transformers_summarizer = transformers.pipeline("summarization")
@app.get("/mila/pdf")
diff --git a/poetry.lock b/poetry.lock
index 2ab217a..55df0df 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -49,6 +49,28 @@ python-versions = "*"
six = ">=1.6.1,<2.0"
[[package]]
+name = "autosemver"
+version = "0.5.5"
+description = "Tools to handle automatic semantic versioning in python"
+category = "main"
+optional = false
+python-versions = "*"
+
+[package.dependencies]
+dulwich = ">=0.19.6,<0.20"
+
+[[package]]
+name = "babel"
+version = "2.10.1"
+description = "Internationalization utilities"
+category = "main"
+optional = false
+python-versions = ">=3.6"
+
+[package.dependencies]
+pytz = ">=2015.7"
+
+[[package]]
name = "beautifulsoup4"
version = "4.10.0"
description = "Screen-scraping library"
@@ -126,6 +148,23 @@ optional = false
python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*"
[[package]]
+name = "dulwich"
+version = "0.19.16"
+description = "Python Git Library"
+category = "main"
+optional = false
+python-versions = "*"
+
+[package.dependencies]
+certifi = "*"
+urllib3 = ">=1.24.1"
+
+[package.extras]
+fastimport = ["fastimport"]
+pgp = ["gpg"]
+https = ["urllib3[secure] (>=1.24.1)"]
+
+[[package]]
name = "fastapi"
version = "0.70.1"
description = "FastAPI framework, high performance, easy to learn, fast to code, ready for production"
@@ -339,6 +378,26 @@ perf = ["ipython"]
testing = ["pytest (>=6)", "pytest-checkdocs (>=2.4)", "pytest-flake8", "pytest-cov", "pytest-enabler (>=1.0.1)", "packaging", "pyfakefs", "flufl.flake8", "pytest-perf (>=0.9.2)", "pytest-black (>=0.3.7)", "pytest-mypy (>=0.9.1)", "importlib-resources (>=1.3)"]
[[package]]
+name = "inspire-utils"
+version = "3.0.25"
+description = "INSPIRE-specific utils."
+category = "main"
+optional = false
+python-versions = "*"
+
+[package.dependencies]
+babel = ">=2.5.1,<3.0"
+lxml = ">=4.4.0,<5.0"
+nameparser = ">=0.5.3,<1.0"
+python-dateutil = ">=2.6.1,<3.0"
+six = ">=1.10.0,<2.0"
+Unidecode = ">=1.0.22,<2.0"
+
+[package.extras]
+all = ["flake8-future-import (>=0.4.3,<1.0)", "mock (>=2.0.0,<3.0)", "pytest-cov (>=2.5.1,<3.0)", "pytest (>=4.6.0,<5.0)", "unicode-string-literal (>=1.1,<2.0)"]
+tests = ["flake8-future-import (>=0.4.3,<1.0)", "mock (>=2.0.0,<3.0)", "pytest-cov (>=2.5.1,<3.0)", "pytest (>=4.6.0,<5.0)", "unicode-string-literal (>=1.1,<2.0)"]
+
+[[package]]
name = "jieba3k"
version = "0.35.1"
description = "Chinese Words Segementation Utilities"
@@ -416,6 +475,14 @@ importlib-metadata = {version = ">=4.4", markers = "python_version < \"3.10\""}
testing = ["coverage", "pyyaml"]
[[package]]
+name = "nameparser"
+version = "0.5.8"
+description = "A simple Python module for parsing human names into their individual components."
+category = "main"
+optional = false
+python-versions = "*"
+
+[[package]]
name = "newspaper3k"
version = "0.2.8"
description = "Simplified python article discovery & extraction."
@@ -569,6 +636,14 @@ python-versions = ">=3.6"
diagrams = ["jinja2", "railroad-diagrams"]
[[package]]
+name = "pypdf2"
+version = "1.27.12"
+description = "A pure-python PDF library capable of splitting, merging, cropping, and transforming PDF files"
+category = "main"
+optional = false
+python-versions = ">=2.7"
+
+[[package]]
name = "python-dateutil"
version = "2.8.2"
description = "Extensions to the standard Python datetime module"
@@ -580,6 +655,22 @@ python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,>=2.7"
six = ">=1.5"
[[package]]
+name = "python-magic"
+version = "0.4.25"
+description = "File type identification using libmagic"
+category = "main"
+optional = false
+python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*"
+
+[[package]]
+name = "pytz"
+version = "2022.1"
+description = "World timezone definitions, modern and historical"
+category = "main"
+optional = false
+python-versions = "*"
+
+[[package]]
name = "pyyaml"
version = "6.0"
description = "YAML parser and emitter for Python"
@@ -604,6 +695,28 @@ lxml = "*"
test = ["timeout-decorator"]
[[package]]
+name = "refextract"
+version = "1.1.4"
+description = "Small library for extracting references used in scholarly communication."
+category = "main"
+optional = false
+python-versions = "*"
+
+[package.dependencies]
+autosemver = ">=0.5.3,<1.0"
+inspire-utils = ">=3.0.0,<4.0"
+PyPDF2 = ">=1.26.0,<2.0"
+python-magic = ">=0.4.15,<1.0"
+requests = ">=2.18.4,<3.0"
+six = ">=1.10.0,<2.0"
+unidecode = ">=1.0.22,<2.0"
+
+[package.extras]
+all = ["Sphinx (>=1.7.1,<2.0)", "flake8-future-import (>=0.4.4,<1.0)", "flake8 (>=3.5.0,<4.0)", "pytest-cov (>=2.10,<3.0)", "pytest (>=4.6,<5.0)", "responses (>=0.8.1,<1.0)"]
+docs = ["Sphinx (>=1.7.1,<2.0)"]
+tests = ["flake8-future-import (>=0.4.4,<1.0)", "flake8 (>=3.5.0,<4.0)", "pytest-cov (>=2.10,<3.0)", "pytest (>=4.6,<5.0)", "responses (>=0.8.1,<1.0)", "unicode-string-literal (>=1.1,<2.0)"]
+
+[[package]]
name = "regex"
version = "2022.3.15"
description = "Alternative regular expression module, to replace re."
@@ -966,6 +1079,14 @@ optional = false
python-versions = ">=3.6"
[[package]]
+name = "unidecode"
+version = "1.3.4"
+description = "ASCII transliterations of Unicode text"
+category = "main"
+optional = false
+python-versions = ">=3.5"
+
+[[package]]
name = "urllib3"
version = "1.26.9"
description = "HTTP library with thread-safe connection pooling, file post, and more."
@@ -1027,8 +1148,8 @@ testing = ["pytest (>=6)", "pytest-checkdocs (>=2.4)", "pytest-flake8", "pytest-
[metadata]
lock-version = "1.1"
-python-versions = "3.8"
-content-hash = "58cfe63717206b7a24f611a261b822e8ba646539e66a33a007c88a56e7f1afc6"
+python-versions = "^3.8"
+content-hash = "f7a64a6a1da08676ad0effa163082f02ff87d65ae0e3f4a3f6a2e87607ee1cec"
[metadata.files]
absl-py = [
@@ -1047,6 +1168,13 @@ astunparse = [
{file = "astunparse-1.6.3-py2.py3-none-any.whl", hash = "sha256:c2652417f2c8b5bb325c885ae329bdf3f86424075c4fd1a128674bc6fba4b8e8"},
{file = "astunparse-1.6.3.tar.gz", hash = "sha256:5ad93a8456f0d084c3456d059fd9a92cce667963232cbf763eac3bc5b7940872"},
]
+autosemver = [
+ {file = "autosemver-0.5.5.tar.gz", hash = "sha256:0af1e8a9c3604545c067311f1c26403e8f0d60b5d9561c0217e14eee21c98b02"},
+]
+babel = [
+ {file = "Babel-2.10.1-py3-none-any.whl", hash = "sha256:3f349e85ad3154559ac4930c3918247d319f21910d5ce4b25d439ed8693b98d2"},
+ {file = "Babel-2.10.1.tar.gz", hash = "sha256:98aeaca086133efb3e1e2aad0396987490c8425929ddbcfe0550184fdc54cd13"},
+]
beautifulsoup4 = [
{file = "beautifulsoup4-4.10.0-py3-none-any.whl", hash = "sha256:9a315ce70049920ea4572a4055bc4bd700c940521d36fc858205ad4fcde149bf"},
{file = "beautifulsoup4-4.10.0.tar.gz", hash = "sha256:c23ad23c521d818955a4151a67d81580319d4bf548d3d49f4223ae041ff98891"},
@@ -1079,6 +1207,15 @@ cssselect = [
{file = "cssselect-1.1.0-py2.py3-none-any.whl", hash = "sha256:f612ee47b749c877ebae5bb77035d8f4202c6ad0f0fc1271b3c18ad6c4468ecf"},
{file = "cssselect-1.1.0.tar.gz", hash = "sha256:f95f8dedd925fd8f54edb3d2dfb44c190d9d18512377d3c1e2388d16126879bc"},
]
+dulwich = [
+ {file = "dulwich-0.19.16-cp27-cp27m-win32.whl", hash = "sha256:267160904e9a1cb6c248c5efc53597a35d038ecc6f60bdc4546b3053bed11982"},
+ {file = "dulwich-0.19.16-cp27-cp27m-win_amd64.whl", hash = "sha256:4e3aba5e4844e7c700721c1fc696987ea820ee3528a03604dc4e74eff4196826"},
+ {file = "dulwich-0.19.16-cp35-cp35m-win_amd64.whl", hash = "sha256:60bb2c2c92f5025c1b53a556304008f0f624c98ae36f22d870e056b2d4236c11"},
+ {file = "dulwich-0.19.16-cp36-cp36m-win_amd64.whl", hash = "sha256:f00d132082b8fcc2eb0d722abc773d4aeb5558c1475d7edd1f0f571146c29db9"},
+ {file = "dulwich-0.19.16-py2-none-any.whl", hash = "sha256:10699277c6268d0c16febe141a5b1c1a6e9744f3144c2d2de1706f4b1adafe63"},
+ {file = "dulwich-0.19.16-py3-none-any.whl", hash = "sha256:dddae02d372fc3b5cfb0046d0f62246ef281fa0c088df7601ab5916607add94b"},
+ {file = "dulwich-0.19.16.tar.gz", hash = "sha256:f74561c448bfb6f04c07de731c1181ae4280017f759b0bb04fa5770aa84ca850"},
+]
fastapi = [
{file = "fastapi-0.70.1-py3-none-any.whl", hash = "sha256:5367226c7bcd7bfb2e17edaf225fd9a983095b1372281e9a3eb661336fb93748"},
{file = "fastapi-0.70.1.tar.gz", hash = "sha256:21d03979b5336375c66fa5d1f3126c6beca650d5d2166fbb78345a30d33c8d06"},
@@ -1208,6 +1345,10 @@ importlib-metadata = [
{file = "importlib_metadata-4.11.3-py3-none-any.whl", hash = "sha256:1208431ca90a8cca1a6b8af391bb53c1a2db74e5d1cef6ddced95d4b2062edc6"},
{file = "importlib_metadata-4.11.3.tar.gz", hash = "sha256:ea4c597ebf37142f827b8f39299579e31685c31d3a438b59f469406afd0f2539"},
]
+inspire-utils = [
+ {file = "inspire-utils-3.0.25.tar.gz", hash = "sha256:55042ac7e3669e9710d218f8163b96f6f771fe670bf00fdb156ef774e1a7a5eb"},
+ {file = "inspire_utils-3.0.25-py2-none-any.whl", hash = "sha256:043827c41182413c82809656487025a5ab76a483046a59b2e946a5d9a9d2fbfe"},
+]
jieba3k = [
{file = "jieba3k-0.35.1.zip", hash = "sha256:980a4f2636b778d312518066be90c7697d410dd5a472385f5afced71a2db1c10"},
]
@@ -1299,6 +1440,10 @@ markdown = [
{file = "Markdown-3.3.6-py3-none-any.whl", hash = "sha256:9923332318f843411e9932237530df53162e29dc7a4e2b91e35764583c46c9a3"},
{file = "Markdown-3.3.6.tar.gz", hash = "sha256:76df8ae32294ec39dcf89340382882dfa12975f87f45c3ed1ecdb1e8cefc7006"},
]
+nameparser = [
+ {file = "nameparser-0.5.8-py2.py3-none-any.whl", hash = "sha256:550bf4df749d4aca23b9fad486ed67ddc042c89234840fea106dcb8d5a3c4365"},
+ {file = "nameparser-0.5.8.tar.gz", hash = "sha256:1089efbd576c917cd6970968c2735dc6e30f20d64318c4207124476270df7afc"},
+]
newspaper3k = [
{file = "newspaper3k-0.2.8-py3-none-any.whl", hash = "sha256:44a864222633d3081113d1030615991c3dbba87239f6bbf59d91240f71a22e3e"},
{file = "newspaper3k-0.2.8.tar.gz", hash = "sha256:9f1bd3e1fb48f400c715abf875cc7b0a67b7ddcd87f50c9aeeb8fcbbbd9004fb"},
@@ -1477,10 +1622,22 @@ pyparsing = [
{file = "pyparsing-3.0.7-py3-none-any.whl", hash = "sha256:a6c06a88f252e6c322f65faf8f418b16213b51bdfaece0524c1c1bc30c63c484"},
{file = "pyparsing-3.0.7.tar.gz", hash = "sha256:18ee9022775d270c55187733956460083db60b37d0d0fb357445f3094eed3eea"},
]
+pypdf2 = [
+ {file = "PyPDF2-1.27.12-py3-none-any.whl", hash = "sha256:9416c347b4c03391caf7562486bec0fd129bbb6a3359eefe4a0b758d0e3dc20c"},
+ {file = "PyPDF2-1.27.12.tar.gz", hash = "sha256:20929fad10a3b4890862f65f3a46f563cfdf53132faae5193b54e18658467a60"},
+]
python-dateutil = [
{file = "python-dateutil-2.8.2.tar.gz", hash = "sha256:0123cacc1627ae19ddf3c27a5de5bd67ee4586fbdd6440d9748f8abb483d3e86"},
{file = "python_dateutil-2.8.2-py2.py3-none-any.whl", hash = "sha256:961d03dc3453ebbc59dbdea9e4e11c5651520a876d0f4db161e8674aae935da9"},
]
+python-magic = [
+ {file = "python-magic-0.4.25.tar.gz", hash = "sha256:21f5f542aa0330f5c8a64442528542f6215c8e18d2466b399b0d9d39356d83fc"},
+ {file = "python_magic-0.4.25-py2.py3-none-any.whl", hash = "sha256:1a2c81e8f395c744536369790bd75094665e9644110a6623bcc3bbea30f03973"},
+]
+pytz = [
+ {file = "pytz-2022.1-py2.py3-none-any.whl", hash = "sha256:e68985985296d9a66a881eb3193b0906246245294a881e7c8afe623866ac6a5c"},
+ {file = "pytz-2022.1.tar.gz", hash = "sha256:1e760e2fe6a8163bc0b3d9a19c4f84342afa0a2affebfaa84b01b978a02ecaa7"},
+]
pyyaml = [
{file = "PyYAML-6.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:d4db7c7aef085872ef65a8fd7d6d09a14ae91f691dec3e87ee5ee0539d516f53"},
{file = "PyYAML-6.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:9df7ed3b3d2e0ecfe09e14741b857df43adb5a3ddadc919a2d94fbdf78fea53c"},
@@ -1520,6 +1677,10 @@ readability-lxml = [
{file = "readability-lxml-0.8.1.tar.gz", hash = "sha256:e51fea56b5909aaf886d307d48e79e096293255afa567b7d08bca94d25b1a4e1"},
{file = "readability_lxml-0.8.1-py3-none-any.whl", hash = "sha256:e0d366a21b1bd6cca17de71a4e6ea16fcfaa8b0a5b4004e39e2c7eff884e6305"},
]
+refextract = [
+ {file = "refextract-1.1.4-py3-none-any.whl", hash = "sha256:f8d0dc69289230eec861a40b5956f88ec1661294e1d0f419730429cef40a0f14"},
+ {file = "refextract-1.1.4.tar.gz", hash = "sha256:b04b2223c8b80bec4d130e1f95eef5102c8483914f30c35372c19dbcef43480c"},
+]
regex = [
{file = "regex-2022.3.15-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:42eb13b93765c6698a5ab3bcd318d8c39bb42e5fa8a7fcf7d8d98923f3babdb1"},
{file = "regex-2022.3.15-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:9beb03ff6fe509d6455971c2489dceb31687b38781206bcec8e68bdfcf5f1db2"},
@@ -1761,6 +1922,10 @@ typing-extensions = [
{file = "typing_extensions-4.1.1-py3-none-any.whl", hash = "sha256:21c85e0fe4b9a155d0799430b0ad741cdce7e359660ccbd8b530613e8df88ce2"},
{file = "typing_extensions-4.1.1.tar.gz", hash = "sha256:1a9462dcc3347a79b1f1c0271fbe79e844580bb598bafa1ed208b94da3cdcd42"},
]
+unidecode = [
+ {file = "Unidecode-1.3.4-py3-none-any.whl", hash = "sha256:afa04efcdd818a93237574791be9b2817d7077c25a068b00f8cff7baa4e59257"},
+ {file = "Unidecode-1.3.4.tar.gz", hash = "sha256:8e4352fb93d5a735c788110d2e7ac8e8031eb06ccbfe8d324ab71735015f9342"},
+]
urllib3 = [
{file = "urllib3-1.26.9-py2.py3-none-any.whl", hash = "sha256:44ece4d53fb1706f667c9bd1c648f5469a2ec925fcf3a776667042d645472c14"},
{file = "urllib3-1.26.9.tar.gz", hash = "sha256:aabaf16477806a5e1dd19aa41f8c2b7950dd3c746362d7e3223dbe6de6ac448e"},
diff --git a/pyproject.toml b/pyproject.toml
index 82a7025..2f79961 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -18,6 +18,7 @@ torch = "^1.9.1"
transformers = "^4.11.2"
fastapi = "^0.70.0"
uvicorn = "^0.15.0"
+refextract = "^1.1.4"
[tool.poetry.dev-dependencies]