aboutsummaryrefslogblamecommitdiffstats
path: root/bin/sniff
blob: b884a91093846e98ce215a8dea7ca79c7d6e5286 (plain) (tree)
1
2
3
4
5
6
7
8
9

                                                 

               
         



                              

 


                           

 

                                                          
        


                                                      

                                      




                                                                         



                           
                                                           





                                                       

 


                                        
 



                                                                              



                                                                     
                                                                            



                                                                   


                                       
            















           

 






















                                                                  
                                           

                                                     

                     
                                                     


                              
                                     
                                                  
                                   



                               



                                                                   






                              

                          
#!/usr/bin/env python
"""Sniffs different file types in a given URL."""

import argparse
import re
import typing
import contextlib
import requests  # type:ignore
import bs4  # type:ignore


def log_error(error):
    """A logger wrapper."""
    print(error)


def simple_get(url) -> typing.Optional[typing.ByteString]:
    """A simple get wrapper."""
    try:
        with contextlib.closing(
            requests.get(url, stream=True, timeout=10)
        ) as resp:
            if is_good_response(resp):
                return resp.content
        return None
    except requests.exceptions.RequestException as error:
        log_error(
            f"Error during requests to {0} : {1}".format(url, str(error))
        )
        return None


def is_good_response(resp):
    """Checks if the response we get is a good response."""
    content_type = resp.headers["Content-Type"].lower()
    return (
        resp.status_code == 200
        and content_type is not None
        and content_type.find("html") > -1
    )


# pylint: disable=too-few-public-methods
class Argparser:
    """Argparser"""

    def __init__(self):
        parser = argparse.ArgumentParser()
        parser.add_argument("--src", type=str, help="url")
        parser.add_argument("--str", type=str, help="what string to look for")
        parser.add_argument(
            "--vid", action="store_true", help="video", default=False
        )
        parser.add_argument(
            "--img", action="store_true", help="sniff images", default=False
        )
        parser.add_argument(
            "--url", action="store_true", help="url", default=False
        )
        self.args = parser.parse_args()


VID_FMTS = [
    "webm",
    "mpg",
    "mp2",
    "mpeg",
    "mpe",
    "mpv",
    "ogg",
    "mp4",
    "m4p",
    "m4v",
    "flv",
    "avi",
    "wmv",
    "mkv",
    "svi",
]


def image_finder(url: str) -> None:
    """Sniffs images."""
    # raw_url_content = simple_get(url)
    response = requests.get(url, timeout=10, allow_redirects=True)
    # print(response.content)
    if response.content is None:
        return None

    soup = bs4.BeautifulSoup(response.content, "lxml")
    search_results = soup.findAll("img")
    for result in search_results:
        print(result["src"])
        # img_response = requests.get(
        #     result["src"], timeout=10, allow_redirects=True
        # )
    return None


def main() -> None:
    """Entry point."""
    argparser = Argparser()
    if argparser.args.img:
        image_finder(argparser.args.src)
    raw_ml = simple_get(argparser.args.src)
    ml_str = repr(bs4.BeautifulSoup(raw_ml, "lxml"))
    tmp = open("/tmp/riecher", "w", encoding="utf-8")
    tmp.write(ml_str)
    tmp.close()
    tmp = open("/tmp/riecher", "r", encoding="utf-8")
    if argparser.args.src:
        if argparser.args.vid:
            for line in tmp:
                for elem in VID_FMTS:
                    if line.find("." + elem) > -1:
                        print(line)
    if argparser.args.url:
        dump_list = []
        for line in tmp:
            dummy = re.findall(
                "http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\(\),]|"
                r"(?:%[0-9a-fA-F][0-9a-fA-F]))+",
                line,
            )
            dump_list += dummy
        for elem in dump_list:
            print(elem)

    tmp.close()


if __name__ == "__main__":
    main()