#!/usr/bin/env python """Sniffs different file types in a given URL.""" import argparse import re import typing import contextlib import requests # type:ignore import bs4 # type:ignore def log_error(error): """A logger wrapper.""" print(error) def simple_get(url) -> typing.Optional[typing.ByteString]: """A simple get wrapper.""" try: with contextlib.closing( requests.get(url, stream=True, timeout=10) ) as resp: if is_good_response(resp): return resp.content return None except requests.exceptions.RequestException as error: log_error( f"Error during requests to {0} : {1}".format(url, str(error)) ) return None def is_good_response(resp): """Checks if the response we get is a good response.""" content_type = resp.headers["Content-Type"].lower() return ( resp.status_code == 200 and content_type is not None and content_type.find("html") > -1 ) # pylint: disable=too-few-public-methods class Argparser: """Argparser""" def __init__(self): parser = argparse.ArgumentParser() parser.add_argument("--src", type=str, help="url") parser.add_argument("--str", type=str, help="what string to look for") parser.add_argument( "--vid", action="store_true", help="video", default=False ) parser.add_argument( "--img", action="store_true", help="sniff images", default=False ) parser.add_argument( "--url", action="store_true", help="url", default=False ) self.args = parser.parse_args() VID_FMTS = [ "webm", "mpg", "mp2", "mpeg", "mpe", "mpv", "ogg", "mp4", "m4p", "m4v", "flv", "avi", "wmv", "mkv", "svi", ] def image_finder(url: str) -> None: """Sniffs images.""" # raw_url_content = simple_get(url) response = requests.get(url, timeout=10, allow_redirects=True) # print(response.content) if response.content is None: return None soup = bs4.BeautifulSoup(response.content, "lxml") search_results = soup.findAll("img") for result in search_results: print(result["src"]) # img_response = requests.get( # result["src"], timeout=10, allow_redirects=True # ) return None def main() -> None: """Entry point.""" argparser = Argparser() if argparser.args.img: image_finder(argparser.args.src) raw_ml = simple_get(argparser.args.src) ml_str = repr(bs4.BeautifulSoup(raw_ml, "lxml")) tmp = open("/tmp/riecher", "w", encoding="utf-8") tmp.write(ml_str) tmp.close() tmp = open("/tmp/riecher", "r", encoding="utf-8") if argparser.args.src: if argparser.args.vid: for line in tmp: for elem in VID_FMTS: if line.find("." + elem) > -1: print(line) if argparser.args.url: dump_list = [] for line in tmp: dummy = re.findall( "http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\(\),]|" r"(?:%[0-9a-fA-F][0-9a-fA-F]))+", line, ) dump_list += dummy for elem in dump_list: print(elem) tmp.close() if __name__ == "__main__": main()