1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
|
#!/usr/bin/python3
import argparse
import json
import code
import signal
import sys
import os
import requests
from bs4 import BeautifulSoup
import subprocess
import re
from pathlib import Path
def SigHandler_SIGINT(signum, frame):
print()
sys.exit(0)
class Argparser(object):
def __init__(self):
parser = argparse.ArgumentParser()
parser.add_argument("--url", type=str, nargs="+", help="url to scrape")
parser.add_argument("--name", type=str, nargs="+",
help="url to scrape")
parser.add_argument("--dbg", action="store_true",
help="debug", default=False)
parser.add_argument("--demon", action="store_true",
help="run as daemon", default=False)
parser.add_argument("--manga", action="store_true",
help="run manga scraper", default=False)
parser.add_argument("--cb", action="store_true",
help="run cb scraper", default=False)
parser.add_argument("--slumber", type=int,
help="how long the demon sleeps", default=60)
self.args = parser.parse_args()
def mrg(url):
requests.packages.urllib3.disable_warnings()
resp = requests.get(url, verify=False)
soup = BeautifulSoup(resp.text, "lxml")
search = soup.find_all("div", class_="label")
Up_Time = str()
for elem in search:
if elem.string == "Last Broadcast:":
Up_Time = elem.next_sibling.next_sibling.string
# print(elem.next_sibling.next_sibling)
if Up_Time.find("minutes") != -1:
return True
else:
return False
def run_cb_scrape():
url = json.load(open("/home/bloodstalker/extra/kaminokumo/data.json"))
if mrg(url["1"]):
print("mg ", end="")
vocalize(os.path.expanduser("~") + "/scripts/mila/mgup.ogg")
if mrg(url["2"]):
print("obk ", end="")
vocalize(os.path.expanduser("~") + "/scripts/mila/obkup.ogg")
if mrg(url["5"]):
print("ls ", end="")
vocalize(os.path.expanduser("~") + "/scripts/mila/lisaup.ogg")
def manga_scrape():
path = str()
if Path(sys.argv[0]).is_symlink():
path = os.readlink(sys.argv[0])
else:
path = sys.argv[0]
urls = json.load(open(os.path.dirname(path)+"/manga.json"))
requests.packages.urllib3.disable_warnings()
result = str()
for name, url in urls.items():
resp = requests.get(url, verify=False)
soup = BeautifulSoup(resp.text, "lxml")
search = soup.find_all("a", class_="chapter-name text-nowrap")
re_res = []
for thing in search:
re_res.append(re.findall("Chapter [0-9]*[.[0-9]*]?", thing.text))
# print(name, "-->", re_res[0][0])
result += name + "-->" + re_res[0][0] + "\n"
print(result, end="")
def vocalize(sound):
subprocess.call([os.path.expanduser("~")+"/scripts/voice.sh", sound])
###############################################################################
def premain(argparser):
signal.signal(signal.SIGINT, SigHandler_SIGINT)
if argparser.args.cb:
run_cb_scrape()
elif argparser.args.manga:
manga_scrape()
else:
pass
def main():
argparser = Argparser()
if argparser.args.dbg:
try:
premain(argparser)
except Exception as e:
print(e.__doc__)
if e.message:
print(e.message)
variables = globals().copy()
variables.update(locals())
shell = code.InteractiveConsole(variables)
shell.interact(banner="DEBUG REPL")
else:
premain(argparser)
if __name__ == "__main__":
main()
|