diff options
| -rw-r--r-- | devourer.py | 17 | ||||
| -rwxr-xr-x | tests.sh | 3 | 
2 files changed, 13 insertions, 7 deletions
diff --git a/devourer.py b/devourer.py index 0a64e8e..75f469b 100644 --- a/devourer.py +++ b/devourer.py @@ -131,7 +131,7 @@ def extractRequirements(textBody: str) -> list:      sentences = nltk.sent_tokenize(textBody)      for sentence in sentences:          for keyword in REQ_KEYWORDS: -            if sentence.find(keyword) >= 0: +            if sentence.casefold().find(keyword) >= 0:                  result.append(sentence)      return result @@ -183,17 +183,22 @@ def getRequirements(url: str, sourcetype: str) -> list:                  a = Article(article.url)                  a.download()                  a.parse() +                a.nlp()                  doc = Document(a.html) +                print(doc)                  # print(doc.summary()) -                results = extractRequirements(doc.summary()) +                # results = extractRequirements(doc.summary()) +                results = extractRequirements(doc)          elif sourcetype == "text":              bytesText = simpleGet(url)              results = extractRequirements(bytesText.decode("utf-8"))      except Exception as e:          logging.exception(e)      finally: -        result = "".join(results + "\n") -        return result +        print(result) +        # result = "".join(results) + "\n" +        # return result +        return results  # FIXME-summary=bart doesnt work @@ -284,10 +289,10 @@ def pdf_to_audio_ep(url: str):  @app.get("/mila/reqs")  def extract_reqs_ep(url: str, sourcetype: str = "html"):      """extracts the requirements from a given url""" -    result = getRequirements() +    result = getRequirements(url, sourcetype)      return {          "Content-Type": "application/json", -        "isOK": True if result != "" else False, +        "isOK": True if result is not None else False,          "reqs": result,      } @@ -1,4 +1,5 @@  #!/usr/bin/env sh -curl -k -X GET https://localhost:19019/mila/summ?url=https://dilipkumar.medium.com/standalone-mongodb-on-kubernetes-cluster-19e7b5896b27&summary=newspaper&audio=false +curl -k -X GET https://localhost:19019/mila/summ?url=https://dilipkumar.medium.com/standalone-mongodb-on-kubernetes-cluster-19e7b5896b27&summary=newspaper&audio=true  curl -k -X GET https://localhost:19019/mila/wiki?term=iommu +curl -k -X GET https://localhost:19019/mila/reqs?url=https://www.ietf.org/rfc/rfc2865.txt&sourcetype=text  | 
