1 files changed, 187 insertions, 143 deletions
diff --git a/devourer.py b/devourer.py
index e869c4c..cc25206 100755..100644
--- a/devourer.py
+++ b/devourer.py
@@ -1,14 +1,11 @@
-#!/usr/bin/env python3
 # _*_ coding=utf-8 _*_
 
-import argparse
 import logging
 import tika
-import docker
-import os
 import nltk
 import random
 import string
+import os
 from newspaper import Article, build, Config
 from bs4 import BeautifulSoup
 from contextlib import closing
@@ -18,69 +15,7 @@ from re import findall
 from readability import Document
 from gtts import gTTS
 from datetime import datetime as time
-
-
-WIKIPEDIA_SEARCH_URL = "https://en.wikipedia.org/w/api.php"
-
-
-class Argparser(object):
-    def __init__(self):
-        parser = argparse.ArgumentParser()
-        parser.add_argument(
-            "--source",
-            type=str,
-            help="the url where the \
-                            urls to be extracted reside",
-            default="",
-        )
-        parser.add_argument(
-            "--out",
-            type=str,
-            help="the output file name if it applies",
-            default="",
-        )
-        parser.add_argument(
-            "--singlelink",
-            action="store_true",
-            help="whether the app should work in single-link \
-                            meaning only one page's contents will be used \
-                            mode",
-            default=False,
-        )
-        parser.add_argument(
-            "--multilink",
-            action="store_true",
-            help="whether the app should work in multi-link \
-                            mode meaning the srouce contians a list of links \
-                            rather than being the actual source itself",
-            default=False,
-        )
-        parser.add_argument(
-            "--sourcetype",
-            type=str,
-            help="determines the type of the \
-                            source:html,text,...",
-            default="html",
-        )
-        parser.add_argument(
-            "--pdftomp3",
-            action="store_true",
-            default=False,
-            help="convert pdf to mp3. \
-                            source should be the path to a pdf file and\
-                            out should be the path to the mp3 output file",
-        )
-        parser.add_argument(
-            "--summary",
-            type=str,
-            default="newspaper",
-            help="which summary type to use. currently we \
-                            have newspaper, bart and none.",
-        )
-        parser.add_argument(
-            "--search", type=str, default="", help="the string to search for"
-        )
-        self.args = parser.parse_args()
+from fastapi import FastAPI
 
 
 # FIXME-maybe actually really do some logging
@@ -109,7 +44,7 @@ def simpleGet(url: str) -> bytes:
 
 
 def getWithParams(url: str, params: dict) -> dict:
-    """Issues a get requesti with params."""
+    """Issues a get request with params."""
     try:
         with closing(get(url, params=params, stream=True)) as resp:
             if isAGoodResponse(resp):
@@ -160,24 +95,19 @@ def configNews(config: Config) -> None:
     config.browser_user_agent = "Chrome/91.0.4464.5"
 
 
-def pdfToVoice(argparser: Argparser) -> None:
+# FIXME-have to decide whether to use files or urls
+def pdfToVoice() -> str:
     """Main function for converting a pdf to an mp3."""
-    TIKA_SERVER_ENDPOINT = "127.0.0.1:9977"
-    os.environ["TIKA_SERVER_ENDPOINT"] = TIKA_SERVER_ENDPOINT
-    dockerClient = docker.from_env()
-    container = dockerClient.containers.run(
-        "apache/tika:2.0.0", detach=True, ports={TIKA_SERVER_ENDPOINT: "9998"}
-    )
-    while True:
-        resp = get("http://127.0.0.1:9977")
-        if resp.status_code == 200:
-            break
-        time.sleep(0.5)
-    rawText = tika.parser.from_file()
-    tts = gTTS(rawText["content"])
-    tts.save(argparser.args.out)
-    container.stop()
-    dockerClient.close()
+    outfile = str()
+    try:
+        rawText = tika.parser.from_file()
+        tts = gTTS(rawText["content"])
+        outfile = getRandStr(20) + ".mp3"
+        tts.save(outfile)
+    except Exception as e:
+        logging.exception(e)
+    finally:
+        return outfile
 
 
 def extractRequirements(textBody: str) -> list:
@@ -228,90 +158,204 @@ def summarizeText(text: str) -> str:
     ]
 
 
-def textToAudio(text: str) -> None:
+def textToAudio(text: str) -> str:
     """Transform the given text into audio."""
-    tts = gTTS(text)
-    tts.save(time.today().strftime("%b-%d-%Y-%M-%S-%f") + ".mp3")
+    try:
+        path = str()
+        path = (
+            os.environ["AUDIO_DUMP_DIR"]
+            + time.today().strftime("%b-%d-%Y-%M-%S-%f")
+            + ".mp3"
+        )
+        tts = gTTS(text)
+        tts.save(path)
+    except Exception as e:
+        logging.exception(e)
+    finally:
+        return path
 
 
-def singleLinkMode(argparser: Argparser) -> dict:
+def getRequirements(url: str, sourcetype: str) -> list:
     """Runs the single-link main function."""
-    if argparser.args.sourcetype == "html":
-        parser = build(argparser.args.source)
-        for article in parser.articles:
-            a = Article(article.url)
-            try:
+    result = str()
+    results = list()
+    try:
+        if sourcetype == "html":
+            parser = build(url)
+            for article in parser.articles:
+                a = Article(article.url)
                 a.download()
                 a.parse()
                 doc = Document(a.html)
-                print(doc.summary())
-                extractRequirements(doc.summary())
-            except Exception as e:
-                logging.exception(e)
-    elif argparser.args.sourcetype == "text":
-        bytesText = simpleGet(argparser.args.source)
-        extractRequirements(bytesText.decode("utf-8"))
+                # print(doc.summary())
+                results = extractRequirements(doc.summary())
+        elif sourcetype == "text":
+            bytesText = simpleGet(url)
+            results = extractRequirements(bytesText.decode("utf-8"))
+    except Exception as e:
+        logging.exception(e)
+    finally:
+        result = "".join(results + "\n")
+        return result
 
 
-def summarizeLinkToAudio(argparser: Argparser) -> None:
+def summarizeLinkToAudio(url, summary) -> str:
     """Summarizes the text inside a given url into audio."""
+    result = str()
     try:
-        article = Article(argparser.args.source)
+        article = Article(url)
         article.download()
         article.parse()
-        if argparser.args.summary == "newspaper":
+        if summary == "newspaper":
             article.nlp()
-            textToAudio(article.summary)
-        elif argparser.args.summary == "none":
-            textToAudio(article.text)
-        elif argparser.args.summary == "bart":
-            textToAudio(summarizeText(article.text))
+            result = article.summary
+        elif summary == "none":
+            result = article.text
+        elif summary == "bart":
+            result = article.text
         else:
-            print("invalid option for summry type.")
+            print("invalid option for summary type.")
+            result = None
     except Exception as e:
+        result = None
         logging.exception(e)
+    finally:
+        return result
 
 
-def summarizeLinksToAudio(argparser: Argparser) -> None:
+def summarizeLinksToAudio(url, summary) -> None:
     """Summarize a list of urls into audio files."""
-    config = Config()
-    configNews(config)
-    urls = getURLS(argparser.args.source)
-    for url in urls:
-        summarizeLinkToAudio(url)
+    results = list()
+    result = str()
+    try:
+        config = Config()
+        configNews(config)
+        urls = getURLS(url, summary)
+        for url in urls:
+            results.append(summarizeLinkToAudio(url))
+    except Exception as e:
+        logging.exception(e)
+    finally:
+        result = "".join(results)
+        return result
 
 
-def searchWikipedia(argparser: Argparser) -> str:
+def searchWikipedia(search_term: str) -> str:
     """Search wikipedia for a string and return the url.
 
     reference: https://www.mediawiki.org/wiki/API:Opensearch
     """
-    searchParmas = {
-        "action": "opensearch",
-        "namespace": "0",
-        "search": argparser.args.search,
-        "limit": "10",
-        "format": "json",
+    result = str()
+    try:
+        searchParmas = {
+            "action": "opensearch",
+            "namespace": "0",
+            "search": search_term,
+            "limit": "10",
+            "format": "json",
+        }
+        res = getWithParams(os.environ["WIKI_SEARCH_URL"], searchParmas)
+        # FIXME-handle wiki redirects/disambiguations
+        # argparser.args.source = res[3][0]
+        print(res)
+    except Exception as e:
+        logging.exception(e)
+    finally:
+        return result
+
+
+def getAudioFromFile(audio_path: str) -> str:
+    """Returns the contents of a file in binary format"""
+    with open(audio_path, "rb") as audio:
+        return audio.read()
+
+
+app = FastAPI()
+
+
+@app.get("/tika")
+async def pdf_to_audio_ep(url: str):
+    """turns a pdf into an audiofile"""
+    audio_path = pdfToVoice()
+    return {
+        "Content-Type": "application/json",
+        "isOK": True if audio_path != "" else False,
+        "audio": getAudioFromFile(audio_path) if audio_path != "" else "",
     }
-    res = getWithParams(WIKIPEDIA_SEARCH_URL, searchParmas)
-    print(res)
-    argparser.args.source = res[3][0]
-    summarizeLinkToAudio(argparser)
-
-
-def main() -> None:
-    argparser = Argparser()
-    if argparser.args.singlelink:
-        summarizeLinkToAudio(argparser)
-    elif argparser.args.multilink:
-        summarizeLinksToAudio(argparser)
-    elif argparser.args.pdftomp3:
-        pdfToVoice(argparser)
-    elif argparser.args.search:
-        searchWikipedia(argparser)
-    else:
-        pass
 
 
-if __name__ == "__main__":
-    main()
+@app.get("/reqs")
+async def extract_reqs_ep(url: str, sourcetype: str = "html"):
+    """extracts the requirements from a given url"""
+    result = getRequirements()
+    return {
+        "Content-Type": "application/json",
+        "isOK": True if result != "" else False,
+        "reqs": result,
+    }
+
+
+@app.get("/wiki")
+async def wiki_search_ep(term: str, audio: bool = False):
+    """search and summarizes from wikipedia"""
+    text = searchWikipedia(term)
+    if audio:
+        audio_path = textToAudio(text)
+        return {
+            "Content-Type": "application/json",
+            "isOK": (True if audio_path != "" else False)
+            and (True if text != "" else False),
+            "audio": getAudioFromFile(audio_path) if audio_path != "" else "",
+            "text": text,
+        }
+    else:
+        return {
+            "Content-Type": "application/json",
+            "isOK": True if text != "" else False,
+            "audio": "",
+            "text": text,
+        }
+
+
+@app.get("/summ")
+async def summarize_ep(url: str, summary: str = "none", audio: bool = False):
+    """summarize and turn the summary into audio"""
+    text = summarizeLinkToAudio(url, summary)
+    if audio:
+        audio_path = textToAudio(text)
+        return {
+            "Content-Type": "application/json",
+            "isOK": (True if audio_path != "" else False)
+            and (True if text != "" else False),
+            "audio": getAudioFromFile(audio_path) if audio_path != "" else "",
+            "text": text,
+        }
+    else:
+        return {
+            "Content-Type": "application/json",
+            "isOK": True if text != "" else False,
+            "audio": "",
+            "text": text,
+        }
+
+
+@app.get("/mila")
+async def mila_ep(url: str, summary: str = "newspaper", audio: bool = False):
+    """extract all the urls and then summarize and turn into audio"""
+    text = summarizeLinksToAudio(url, summary)
+    if audio:
+        audio_path = textToAudio(text)
+        return {
+            "Content-Type": "application/json",
+            "isOK": (True if audio_path != "" else False)
+            and (True if text != "" else False),
+            "audio": getAudioFromFile(audio_path) if audio_path != "" else "",
+            "text": text,
+        }
+    else:
+        return {
+            "Content-Type": "application/json",
+            "isOK": True if text != "" else False,
+            "audio": "",
+            "text": text,
+        }