aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--devourer.py17
-rwxr-xr-xtests.sh3
2 files changed, 13 insertions, 7 deletions
diff --git a/devourer.py b/devourer.py
index 0a64e8e..75f469b 100644
--- a/devourer.py
+++ b/devourer.py
@@ -131,7 +131,7 @@ def extractRequirements(textBody: str) -> list:
sentences = nltk.sent_tokenize(textBody)
for sentence in sentences:
for keyword in REQ_KEYWORDS:
- if sentence.find(keyword) >= 0:
+ if sentence.casefold().find(keyword) >= 0:
result.append(sentence)
return result
@@ -183,17 +183,22 @@ def getRequirements(url: str, sourcetype: str) -> list:
a = Article(article.url)
a.download()
a.parse()
+ a.nlp()
doc = Document(a.html)
+ print(doc)
# print(doc.summary())
- results = extractRequirements(doc.summary())
+ # results = extractRequirements(doc.summary())
+ results = extractRequirements(doc)
elif sourcetype == "text":
bytesText = simpleGet(url)
results = extractRequirements(bytesText.decode("utf-8"))
except Exception as e:
logging.exception(e)
finally:
- result = "".join(results + "\n")
- return result
+ print(result)
+ # result = "".join(results) + "\n"
+ # return result
+ return results
# FIXME-summary=bart doesnt work
@@ -284,10 +289,10 @@ def pdf_to_audio_ep(url: str):
@app.get("/mila/reqs")
def extract_reqs_ep(url: str, sourcetype: str = "html"):
"""extracts the requirements from a given url"""
- result = getRequirements()
+ result = getRequirements(url, sourcetype)
return {
"Content-Type": "application/json",
- "isOK": True if result != "" else False,
+ "isOK": True if result is not None else False,
"reqs": result,
}
diff --git a/tests.sh b/tests.sh
index 4e895b6..37712fd 100755
--- a/tests.sh
+++ b/tests.sh
@@ -1,4 +1,5 @@
#!/usr/bin/env sh
-curl -k -X GET https://localhost:19019/mila/summ?url=https://dilipkumar.medium.com/standalone-mongodb-on-kubernetes-cluster-19e7b5896b27&summary=newspaper&audio=false
+curl -k -X GET https://localhost:19019/mila/summ?url=https://dilipkumar.medium.com/standalone-mongodb-on-kubernetes-cluster-19e7b5896b27&summary=newspaper&audio=true
curl -k -X GET https://localhost:19019/mila/wiki?term=iommu
+curl -k -X GET https://localhost:19019/mila/reqs?url=https://www.ietf.org/rfc/rfc2865.txt&sourcetype=text