diff options
author | terminaldweller <thabogre@gmail.com> | 2022-05-19 08:11:02 +0000 |
---|---|---|
committer | terminaldweller <thabogre@gmail.com> | 2022-05-19 08:11:02 +0000 |
commit | c081f7ae09c770f63cc70a12fcb6901ad5ebd344 (patch) | |
tree | 9b75b2f00671c24eeb0c00f0e2034ee66eb269dd | |
parent | update (diff) | |
download | devourer-old.tar.gz devourer-old.zip |
push before a new branchold
-rw-r--r-- | devourer.py | 5 | ||||
-rw-r--r-- | poetry.lock | 169 | ||||
-rw-r--r-- | pyproject.toml | 1 |
3 files changed, 173 insertions, 2 deletions
diff --git a/devourer.py b/devourer.py index 995ab52..34185ae 100644 --- a/devourer.py +++ b/devourer.py @@ -180,6 +180,10 @@ def summarizeText(text: str) -> str: ] +def summarizeText_v2(text: str) -> str: + pass + + def textToAudio(text: str) -> str: """Transform the given text into audio.""" path = str() @@ -326,6 +330,7 @@ async def addSecureHeaders( nltk.download("punkt") +transformers_summarizer = transformers.pipeline("summarization") @app.get("/mila/pdf") diff --git a/poetry.lock b/poetry.lock index 2ab217a..55df0df 100644 --- a/poetry.lock +++ b/poetry.lock @@ -49,6 +49,28 @@ python-versions = "*" six = ">=1.6.1,<2.0" [[package]] +name = "autosemver" +version = "0.5.5" +description = "Tools to handle automatic semantic versioning in python" +category = "main" +optional = false +python-versions = "*" + +[package.dependencies] +dulwich = ">=0.19.6,<0.20" + +[[package]] +name = "babel" +version = "2.10.1" +description = "Internationalization utilities" +category = "main" +optional = false +python-versions = ">=3.6" + +[package.dependencies] +pytz = ">=2015.7" + +[[package]] name = "beautifulsoup4" version = "4.10.0" description = "Screen-scraping library" @@ -126,6 +148,23 @@ optional = false python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*" [[package]] +name = "dulwich" +version = "0.19.16" +description = "Python Git Library" +category = "main" +optional = false +python-versions = "*" + +[package.dependencies] +certifi = "*" +urllib3 = ">=1.24.1" + +[package.extras] +fastimport = ["fastimport"] +pgp = ["gpg"] +https = ["urllib3[secure] (>=1.24.1)"] + +[[package]] name = "fastapi" version = "0.70.1" description = "FastAPI framework, high performance, easy to learn, fast to code, ready for production" @@ -339,6 +378,26 @@ perf = ["ipython"] testing = ["pytest (>=6)", "pytest-checkdocs (>=2.4)", "pytest-flake8", "pytest-cov", "pytest-enabler (>=1.0.1)", "packaging", "pyfakefs", "flufl.flake8", "pytest-perf (>=0.9.2)", "pytest-black (>=0.3.7)", "pytest-mypy (>=0.9.1)", "importlib-resources (>=1.3)"] [[package]] +name = "inspire-utils" +version = "3.0.25" +description = "INSPIRE-specific utils." +category = "main" +optional = false +python-versions = "*" + +[package.dependencies] +babel = ">=2.5.1,<3.0" +lxml = ">=4.4.0,<5.0" +nameparser = ">=0.5.3,<1.0" +python-dateutil = ">=2.6.1,<3.0" +six = ">=1.10.0,<2.0" +Unidecode = ">=1.0.22,<2.0" + +[package.extras] +all = ["flake8-future-import (>=0.4.3,<1.0)", "mock (>=2.0.0,<3.0)", "pytest-cov (>=2.5.1,<3.0)", "pytest (>=4.6.0,<5.0)", "unicode-string-literal (>=1.1,<2.0)"] +tests = ["flake8-future-import (>=0.4.3,<1.0)", "mock (>=2.0.0,<3.0)", "pytest-cov (>=2.5.1,<3.0)", "pytest (>=4.6.0,<5.0)", "unicode-string-literal (>=1.1,<2.0)"] + +[[package]] name = "jieba3k" version = "0.35.1" description = "Chinese Words Segementation Utilities" @@ -416,6 +475,14 @@ importlib-metadata = {version = ">=4.4", markers = "python_version < \"3.10\""} testing = ["coverage", "pyyaml"] [[package]] +name = "nameparser" +version = "0.5.8" +description = "A simple Python module for parsing human names into their individual components." +category = "main" +optional = false +python-versions = "*" + +[[package]] name = "newspaper3k" version = "0.2.8" description = "Simplified python article discovery & extraction." @@ -569,6 +636,14 @@ python-versions = ">=3.6" diagrams = ["jinja2", "railroad-diagrams"] [[package]] +name = "pypdf2" +version = "1.27.12" +description = "A pure-python PDF library capable of splitting, merging, cropping, and transforming PDF files" +category = "main" +optional = false +python-versions = ">=2.7" + +[[package]] name = "python-dateutil" version = "2.8.2" description = "Extensions to the standard Python datetime module" @@ -580,6 +655,22 @@ python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,>=2.7" six = ">=1.5" [[package]] +name = "python-magic" +version = "0.4.25" +description = "File type identification using libmagic" +category = "main" +optional = false +python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*" + +[[package]] +name = "pytz" +version = "2022.1" +description = "World timezone definitions, modern and historical" +category = "main" +optional = false +python-versions = "*" + +[[package]] name = "pyyaml" version = "6.0" description = "YAML parser and emitter for Python" @@ -604,6 +695,28 @@ lxml = "*" test = ["timeout-decorator"] [[package]] +name = "refextract" +version = "1.1.4" +description = "Small library for extracting references used in scholarly communication." +category = "main" +optional = false +python-versions = "*" + +[package.dependencies] +autosemver = ">=0.5.3,<1.0" +inspire-utils = ">=3.0.0,<4.0" +PyPDF2 = ">=1.26.0,<2.0" +python-magic = ">=0.4.15,<1.0" +requests = ">=2.18.4,<3.0" +six = ">=1.10.0,<2.0" +unidecode = ">=1.0.22,<2.0" + +[package.extras] +all = ["Sphinx (>=1.7.1,<2.0)", "flake8-future-import (>=0.4.4,<1.0)", "flake8 (>=3.5.0,<4.0)", "pytest-cov (>=2.10,<3.0)", "pytest (>=4.6,<5.0)", "responses (>=0.8.1,<1.0)"] +docs = ["Sphinx (>=1.7.1,<2.0)"] +tests = ["flake8-future-import (>=0.4.4,<1.0)", "flake8 (>=3.5.0,<4.0)", "pytest-cov (>=2.10,<3.0)", "pytest (>=4.6,<5.0)", "responses (>=0.8.1,<1.0)", "unicode-string-literal (>=1.1,<2.0)"] + +[[package]] name = "regex" version = "2022.3.15" description = "Alternative regular expression module, to replace re." @@ -966,6 +1079,14 @@ optional = false python-versions = ">=3.6" [[package]] +name = "unidecode" +version = "1.3.4" +description = "ASCII transliterations of Unicode text" +category = "main" +optional = false +python-versions = ">=3.5" + +[[package]] name = "urllib3" version = "1.26.9" description = "HTTP library with thread-safe connection pooling, file post, and more." @@ -1027,8 +1148,8 @@ testing = ["pytest (>=6)", "pytest-checkdocs (>=2.4)", "pytest-flake8", "pytest- [metadata] lock-version = "1.1" -python-versions = "3.8" -content-hash = "58cfe63717206b7a24f611a261b822e8ba646539e66a33a007c88a56e7f1afc6" +python-versions = "^3.8" +content-hash = "f7a64a6a1da08676ad0effa163082f02ff87d65ae0e3f4a3f6a2e87607ee1cec" [metadata.files] absl-py = [ @@ -1047,6 +1168,13 @@ astunparse = [ {file = "astunparse-1.6.3-py2.py3-none-any.whl", hash = "sha256:c2652417f2c8b5bb325c885ae329bdf3f86424075c4fd1a128674bc6fba4b8e8"}, {file = "astunparse-1.6.3.tar.gz", hash = "sha256:5ad93a8456f0d084c3456d059fd9a92cce667963232cbf763eac3bc5b7940872"}, ] +autosemver = [ + {file = "autosemver-0.5.5.tar.gz", hash = "sha256:0af1e8a9c3604545c067311f1c26403e8f0d60b5d9561c0217e14eee21c98b02"}, +] +babel = [ + {file = "Babel-2.10.1-py3-none-any.whl", hash = "sha256:3f349e85ad3154559ac4930c3918247d319f21910d5ce4b25d439ed8693b98d2"}, + {file = "Babel-2.10.1.tar.gz", hash = "sha256:98aeaca086133efb3e1e2aad0396987490c8425929ddbcfe0550184fdc54cd13"}, +] beautifulsoup4 = [ {file = "beautifulsoup4-4.10.0-py3-none-any.whl", hash = "sha256:9a315ce70049920ea4572a4055bc4bd700c940521d36fc858205ad4fcde149bf"}, {file = "beautifulsoup4-4.10.0.tar.gz", hash = "sha256:c23ad23c521d818955a4151a67d81580319d4bf548d3d49f4223ae041ff98891"}, @@ -1079,6 +1207,15 @@ cssselect = [ {file = "cssselect-1.1.0-py2.py3-none-any.whl", hash = "sha256:f612ee47b749c877ebae5bb77035d8f4202c6ad0f0fc1271b3c18ad6c4468ecf"}, {file = "cssselect-1.1.0.tar.gz", hash = "sha256:f95f8dedd925fd8f54edb3d2dfb44c190d9d18512377d3c1e2388d16126879bc"}, ] +dulwich = [ + {file = "dulwich-0.19.16-cp27-cp27m-win32.whl", hash = "sha256:267160904e9a1cb6c248c5efc53597a35d038ecc6f60bdc4546b3053bed11982"}, + {file = "dulwich-0.19.16-cp27-cp27m-win_amd64.whl", hash = "sha256:4e3aba5e4844e7c700721c1fc696987ea820ee3528a03604dc4e74eff4196826"}, + {file = "dulwich-0.19.16-cp35-cp35m-win_amd64.whl", hash = "sha256:60bb2c2c92f5025c1b53a556304008f0f624c98ae36f22d870e056b2d4236c11"}, + {file = "dulwich-0.19.16-cp36-cp36m-win_amd64.whl", hash = "sha256:f00d132082b8fcc2eb0d722abc773d4aeb5558c1475d7edd1f0f571146c29db9"}, + {file = "dulwich-0.19.16-py2-none-any.whl", hash = "sha256:10699277c6268d0c16febe141a5b1c1a6e9744f3144c2d2de1706f4b1adafe63"}, + {file = "dulwich-0.19.16-py3-none-any.whl", hash = "sha256:dddae02d372fc3b5cfb0046d0f62246ef281fa0c088df7601ab5916607add94b"}, + {file = "dulwich-0.19.16.tar.gz", hash = "sha256:f74561c448bfb6f04c07de731c1181ae4280017f759b0bb04fa5770aa84ca850"}, +] fastapi = [ {file = "fastapi-0.70.1-py3-none-any.whl", hash = "sha256:5367226c7bcd7bfb2e17edaf225fd9a983095b1372281e9a3eb661336fb93748"}, {file = "fastapi-0.70.1.tar.gz", hash = "sha256:21d03979b5336375c66fa5d1f3126c6beca650d5d2166fbb78345a30d33c8d06"}, @@ -1208,6 +1345,10 @@ importlib-metadata = [ {file = "importlib_metadata-4.11.3-py3-none-any.whl", hash = "sha256:1208431ca90a8cca1a6b8af391bb53c1a2db74e5d1cef6ddced95d4b2062edc6"}, {file = "importlib_metadata-4.11.3.tar.gz", hash = "sha256:ea4c597ebf37142f827b8f39299579e31685c31d3a438b59f469406afd0f2539"}, ] +inspire-utils = [ + {file = "inspire-utils-3.0.25.tar.gz", hash = "sha256:55042ac7e3669e9710d218f8163b96f6f771fe670bf00fdb156ef774e1a7a5eb"}, + {file = "inspire_utils-3.0.25-py2-none-any.whl", hash = "sha256:043827c41182413c82809656487025a5ab76a483046a59b2e946a5d9a9d2fbfe"}, +] jieba3k = [ {file = "jieba3k-0.35.1.zip", hash = "sha256:980a4f2636b778d312518066be90c7697d410dd5a472385f5afced71a2db1c10"}, ] @@ -1299,6 +1440,10 @@ markdown = [ {file = "Markdown-3.3.6-py3-none-any.whl", hash = "sha256:9923332318f843411e9932237530df53162e29dc7a4e2b91e35764583c46c9a3"}, {file = "Markdown-3.3.6.tar.gz", hash = "sha256:76df8ae32294ec39dcf89340382882dfa12975f87f45c3ed1ecdb1e8cefc7006"}, ] +nameparser = [ + {file = "nameparser-0.5.8-py2.py3-none-any.whl", hash = "sha256:550bf4df749d4aca23b9fad486ed67ddc042c89234840fea106dcb8d5a3c4365"}, + {file = "nameparser-0.5.8.tar.gz", hash = "sha256:1089efbd576c917cd6970968c2735dc6e30f20d64318c4207124476270df7afc"}, +] newspaper3k = [ {file = "newspaper3k-0.2.8-py3-none-any.whl", hash = "sha256:44a864222633d3081113d1030615991c3dbba87239f6bbf59d91240f71a22e3e"}, {file = "newspaper3k-0.2.8.tar.gz", hash = "sha256:9f1bd3e1fb48f400c715abf875cc7b0a67b7ddcd87f50c9aeeb8fcbbbd9004fb"}, @@ -1477,10 +1622,22 @@ pyparsing = [ {file = "pyparsing-3.0.7-py3-none-any.whl", hash = "sha256:a6c06a88f252e6c322f65faf8f418b16213b51bdfaece0524c1c1bc30c63c484"}, {file = "pyparsing-3.0.7.tar.gz", hash = "sha256:18ee9022775d270c55187733956460083db60b37d0d0fb357445f3094eed3eea"}, ] +pypdf2 = [ + {file = "PyPDF2-1.27.12-py3-none-any.whl", hash = "sha256:9416c347b4c03391caf7562486bec0fd129bbb6a3359eefe4a0b758d0e3dc20c"}, + {file = "PyPDF2-1.27.12.tar.gz", hash = "sha256:20929fad10a3b4890862f65f3a46f563cfdf53132faae5193b54e18658467a60"}, +] python-dateutil = [ {file = "python-dateutil-2.8.2.tar.gz", hash = "sha256:0123cacc1627ae19ddf3c27a5de5bd67ee4586fbdd6440d9748f8abb483d3e86"}, {file = "python_dateutil-2.8.2-py2.py3-none-any.whl", hash = "sha256:961d03dc3453ebbc59dbdea9e4e11c5651520a876d0f4db161e8674aae935da9"}, ] +python-magic = [ + {file = "python-magic-0.4.25.tar.gz", hash = "sha256:21f5f542aa0330f5c8a64442528542f6215c8e18d2466b399b0d9d39356d83fc"}, + {file = "python_magic-0.4.25-py2.py3-none-any.whl", hash = "sha256:1a2c81e8f395c744536369790bd75094665e9644110a6623bcc3bbea30f03973"}, +] +pytz = [ + {file = "pytz-2022.1-py2.py3-none-any.whl", hash = "sha256:e68985985296d9a66a881eb3193b0906246245294a881e7c8afe623866ac6a5c"}, + {file = "pytz-2022.1.tar.gz", hash = "sha256:1e760e2fe6a8163bc0b3d9a19c4f84342afa0a2affebfaa84b01b978a02ecaa7"}, +] pyyaml = [ {file = "PyYAML-6.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:d4db7c7aef085872ef65a8fd7d6d09a14ae91f691dec3e87ee5ee0539d516f53"}, {file = "PyYAML-6.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:9df7ed3b3d2e0ecfe09e14741b857df43adb5a3ddadc919a2d94fbdf78fea53c"}, @@ -1520,6 +1677,10 @@ readability-lxml = [ {file = "readability-lxml-0.8.1.tar.gz", hash = "sha256:e51fea56b5909aaf886d307d48e79e096293255afa567b7d08bca94d25b1a4e1"}, {file = "readability_lxml-0.8.1-py3-none-any.whl", hash = "sha256:e0d366a21b1bd6cca17de71a4e6ea16fcfaa8b0a5b4004e39e2c7eff884e6305"}, ] +refextract = [ + {file = "refextract-1.1.4-py3-none-any.whl", hash = "sha256:f8d0dc69289230eec861a40b5956f88ec1661294e1d0f419730429cef40a0f14"}, + {file = "refextract-1.1.4.tar.gz", hash = "sha256:b04b2223c8b80bec4d130e1f95eef5102c8483914f30c35372c19dbcef43480c"}, +] regex = [ {file = "regex-2022.3.15-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:42eb13b93765c6698a5ab3bcd318d8c39bb42e5fa8a7fcf7d8d98923f3babdb1"}, {file = "regex-2022.3.15-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:9beb03ff6fe509d6455971c2489dceb31687b38781206bcec8e68bdfcf5f1db2"}, @@ -1761,6 +1922,10 @@ typing-extensions = [ {file = "typing_extensions-4.1.1-py3-none-any.whl", hash = "sha256:21c85e0fe4b9a155d0799430b0ad741cdce7e359660ccbd8b530613e8df88ce2"}, {file = "typing_extensions-4.1.1.tar.gz", hash = "sha256:1a9462dcc3347a79b1f1c0271fbe79e844580bb598bafa1ed208b94da3cdcd42"}, ] +unidecode = [ + {file = "Unidecode-1.3.4-py3-none-any.whl", hash = "sha256:afa04efcdd818a93237574791be9b2817d7077c25a068b00f8cff7baa4e59257"}, + {file = "Unidecode-1.3.4.tar.gz", hash = "sha256:8e4352fb93d5a735c788110d2e7ac8e8031eb06ccbfe8d324ab71735015f9342"}, +] urllib3 = [ {file = "urllib3-1.26.9-py2.py3-none-any.whl", hash = "sha256:44ece4d53fb1706f667c9bd1c648f5469a2ec925fcf3a776667042d645472c14"}, {file = "urllib3-1.26.9.tar.gz", hash = "sha256:aabaf16477806a5e1dd19aa41f8c2b7950dd3c746362d7e3223dbe6de6ac448e"}, diff --git a/pyproject.toml b/pyproject.toml index 82a7025..2f79961 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -18,6 +18,7 @@ torch = "^1.9.1" transformers = "^4.11.2" fastapi = "^0.70.0" uvicorn = "^0.15.0" +refextract = "^1.1.4" [tool.poetry.dev-dependencies] |