update

author: terminaldweller <thabogre@gmail.com> 2021-10-24 06:19:49 +0000
committer: terminaldweller <thabogre@gmail.com> 2021-10-24 06:19:49 +0000
commit: a730529c8fbb791e0421e617d16f26636893e35d (patch)
tree: 2b98c04dd74e7361d11f25a6fdfc44bdcf7caf6f
parent: black (diff)
download: devourer-a730529c8fbb791e0421e617d16f26636893e35d.tar.gz
devourer-a730529c8fbb791e0421e617d16f26636893e35d.zip
2 files changed, 25 insertions, 16 deletions
diff --git a/README.md b/README.md
index d6a41a0..e08f72e 100644
--- a/README.md
+++ b/README.md
@@ -5,6 +5,9 @@
 # devourer
 A knowledge aggregator
 
+## Usage Example
 ```sh
 ./devourer.py --singlelink --source https://en.wikipedia.org/wiki/I/O_virtualization
 ```
+
+
diff --git a/devourer.py b/devourer.py
index 94358dd..e869c4c 100755
--- a/devourer.py
+++ b/devourer.py
@@ -3,8 +3,6 @@
 
 import argparse
 import logging
-import subprocess
-import sys
 import tika
 import docker
 import os
@@ -162,15 +160,6 @@ def configNews(config: Config) -> None:
     config.browser_user_agent = "Chrome/91.0.4464.5"
 
 
-# TODO-should probably deprecate this at some point
-def call_from_shell_list(command_list: list):
-    """Run a shell command given a list of command/arguments."""
-    if sys.version_info < (3, 7):
-        return subprocess.run(command_list, stdout=subprocess.PIPE)
-    else:
-        return subprocess.run(command_list, capture_output=True)
-
-
 def pdfToVoice(argparser: Argparser) -> None:
     """Main function for converting a pdf to an mp3."""
     TIKA_SERVER_ENDPOINT = "127.0.0.1:9977"
@@ -192,10 +181,22 @@ def pdfToVoice(argparser: Argparser) -> None:
 
 
 def extractRequirements(textBody: str) -> list:
-    """Extract the sentences containing the keywords
-    that denote a requirement."""
+    """Extract the sentences containing the keywords that denote a requirement.
+
+    the keywords are baed on ISO/IEC directives, part 2:
+    https://www.iso.org/sites/directives/current/part2/index.xhtml
+    """
     result = []
-    REQ_KEYWORDS = ["shall", "should", "must", "may", "can", "could"]
+    REQ_KEYWORDS = [
+        "shall",
+        "shall not",
+        "should",
+        "should not",
+        "must",
+        "may",
+        "can",
+        "cannot",
+    ]
     nltk.download("punkt")
     sentences = nltk.sent_tokenize(textBody)
     for sentence in sentences:
@@ -212,7 +213,9 @@ def summarizeText(text: str) -> str:
     model = transformers.BartForConditionalGeneration.from_pretrained(
         "facebook/bart-large-cnn"
     )
-    tokenizer = transformers.BartTokenizer.from_pretrained("facebook/bart-large-cnn")
+    tokenizer = transformers.BartTokenizer.from_pretrained(
+        "facebook/bart-large-cnn"
+    )
     inputs = tokenizer([text], max_length=1024, return_tensors="pt")
     summary_ids = model.generate(
         inputs["input_ids"], num_beams=4, max_length=5, early_stopping=True
@@ -279,7 +282,10 @@ def summarizeLinksToAudio(argparser: Argparser) -> None:
 
 
 def searchWikipedia(argparser: Argparser) -> str:
-    """Search wikipedia for a string and return the url."""
+    """Search wikipedia for a string and return the url.
+
+    reference: https://www.mediawiki.org/wiki/API:Opensearch
+    """
     searchParmas = {
         "action": "opensearch",
         "namespace": "0",
author	terminaldweller <thabogre@gmail.com>	2021-10-24 06:19:49 +0000
committer	terminaldweller <thabogre@gmail.com>	2021-10-24 06:19:49 +0000
commit	a730529c8fbb791e0421e617d16f26636893e35d (patch)
tree	2b98c04dd74e7361d11f25a6fdfc44bdcf7caf6f
parent	black (diff)
download	devourer-a730529c8fbb791e0421e617d16f26636893e35d.tar.gz devourer-a730529c8fbb791e0421e617d16f26636893e35d.zip