#!/usr/bin/env python
"""Sniffs different file types in a given URL."""
import argparse
import re
import typing
import contextlib
import requests # type:ignore
import bs4 # type:ignore
def log_error(error):
"""A logger wrapper."""
print(error)
def simple_get(url) -> typing.Optional[typing.ByteString]:
"""A simple get wrapper."""
try:
with contextlib.closing(
requests.get(url, stream=True, timeout=10)
) as resp:
if is_good_response(resp):
return resp.content
return None
except requests.exceptions.RequestException as error:
log_error(
f"Error during requests to {0} : {1}".format(url, str(error))
)
return None
def is_good_response(resp):
"""Checks if the response we get is a good response."""
content_type = resp.headers["Content-Type"].lower()
return (
resp.status_code == 200
and content_type is not None
and content_type.find("html") > -1
)
# pylint: disable=too-few-public-methods
class Argparser:
"""Argparser"""
def __init__(self):
parser = argparse.ArgumentParser()
parser.add_argument("--src", type=str, help="url")
parser.add_argument("--str", type=str, help="what string to look for")
parser.add_argument(
"--vid", action="store_true", help="video", default=False
)
parser.add_argument(
"--img", action="store_true", help="sniff images", default=False
)
parser.add_argument(
"--url", action="store_true", help="url", default=False
)
self.args = parser.parse_args()
VID_FMTS = [
"webm",
"mpg",
"mp2",
"mpeg",
"mpe",
"mpv",
"ogg",
"mp4",
"m4p",
"m4v",
"flv",
"avi",
"wmv",
"mkv",
"svi",
]
def image_finder(url: str) -> None:
"""Sniffs images."""
# raw_url_content = simple_get(url)
response = requests.get(url, timeout=10, allow_redirects=True)
# print(response.content)
if response.content is None:
return None
soup = bs4.BeautifulSoup(response.content, "lxml")
search_results = soup.findAll("img")
for result in search_results:
print(result["src"])
# img_response = requests.get(
# result["src"], timeout=10, allow_redirects=True
# )
return None
def main() -> None:
"""Entry point."""
argparser = Argparser()
if argparser.args.img:
image_finder(argparser.args.src)
raw_ml = simple_get(argparser.args.src)
ml_str = repr(bs4.BeautifulSoup(raw_ml, "lxml"))
tmp = open("/tmp/riecher", "w", encoding="utf-8")
tmp.write(ml_str)
tmp.close()
tmp = open("/tmp/riecher", "r", encoding="utf-8")
if argparser.args.src:
if argparser.args.vid:
for line in tmp:
for elem in VID_FMTS:
if line.find("." + elem) > -1:
print(line)
if argparser.args.url:
dump_list = []
for line in tmp:
dummy = re.findall(
"http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\(\),]|"
r"(?:%[0-9a-fA-F][0-9a-fA-F]))+",
line,
)
dump_list += dummy
for elem in dump_list:
print(elem)
tmp.close()
if __name__ == "__main__":
main()