diff options
Diffstat (limited to 'db')
-rwxr-xr-x | db/mongo/scraper.js | 56 | ||||
-rw-r--r-- | db/postgres/security_rss_keyword.sql | 24 | ||||
-rw-r--r-- | db/postgres/strip_ascii_escape.pgsql | 16 |
3 files changed, 96 insertions, 0 deletions
diff --git a/db/mongo/scraper.js b/db/mongo/scraper.js new file mode 100755 index 0000000..d28330f --- /dev/null +++ b/db/mongo/scraper.js @@ -0,0 +1,56 @@ +#!/usr/bin/env node +"use strict"; + +const x = require("x-ray")(); + +async function getHTML(url) { + const response = await fetch(url, { + credentials: "include", + headers: { + "User-Agent": + "Mozilla/5.0 (Windows NT 10.0; WOW64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/113.0.5666.197 Safari/537.36", + Accept: + "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8", + "Accept-Language": "en-US,en;q=0.5", + "Upgrade-Insecure-Requests": "1", + "Sec-Fetch-Dest": "document", + "Sec-Fetch-Mode": "navigate", + "Sec-Fetch-Site": "none", + "Sec-Fetch-User": "?1", + "Sec-GPC": "1", + "sec-ch-ua-platform": '"Windows"', + "sec-ch-ua": + '"Google Chrome";v="113", "Chromium";v="113", "Not=A?Brand";v="24"', + "sec-ch-ua-mobile": "?0", + Pragma: "no-cache", + "Cache-Control": "no-cache", + }, + method: "GET", + mode: "cors", + }); + + if (response.ok) { + console.log(response.body); + const chunks = []; + + for await (const chunk of response.body) { + chunks.push(Buffer.from(chunk)); + } + + return Buffer.concat(chunks).toString("utf-8"); + } +} + +async function main() { + const resultHTML = await getHTML("https://chapmanganato.com/manga-kd987838"); + + x( + resultHTML, + "a.chapter-name.text-nowrap" + )(function (err, lastChapter) { + console.log(lastChapter); + return lastChapter; + }); +} + +main(); diff --git a/db/postgres/security_rss_keyword.sql b/db/postgres/security_rss_keyword.sql new file mode 100644 index 0000000..6026246 --- /dev/null +++ b/db/postgres/security_rss_keyword.sql @@ -0,0 +1,24 @@ +create table if not exists security_rss_keyword_store ( + id serial primary key not null, + logdate timestamp without time zone default now(), + nick varchar(128) not null, + log varchar(1024) not null, + channel varchar(256) not null +); + +create or replace function security_rss_trigger_function() +returns trigger +language plpgsql +as $$ +begin + if NEW.log like any(array['%nvidia%','%intel%','%dell%']) then + insert into security_rss_keyword_store(nick,log,channel) + values (NEW.nick,NEW.log,NEW.channel); +end; +$$ + +create or replace trigger security_rss_trigger +after insert or update +on public.logs +for each row +execute function security_rss_trigger_function(); diff --git a/db/postgres/strip_ascii_escape.pgsql b/db/postgres/strip_ascii_escape.pgsql new file mode 100644 index 0000000..00714f6 --- /dev/null +++ b/db/postgres/strip_ascii_escape.pgsql @@ -0,0 +1,16 @@ +create or replace function strip_ascii_escape() returns trigger +language plpgsql as $$ + begin + NEW.log = regexp_replace(NEW.log,'\x1b\[[\x30-\x3f]*[\x20-\x2f]*[\x40-\x7e]','','g'); + NEW.log = regexp_replace(NEW.log,'\x1b[PX^_].*?\x1b\\','','g'); + NEW.log = regexp_replace(NEW.log,'\x1b\][^\a]*(?:\a|\x1b\\)','','g'); + NEW.log = regexp_replace(NEW.log,'\x1b[\[\]A-Z\\^_@]','','g'); + NEW.log = regexp_replace(NEW.log,'[\x00-\x1f\x7f-\x9f\xad]+','','g'); + + return NEW; + end; +$$; + +create trigger strip_ascii_escape +before insert on public.logs +for each row execute function strip_ascii_escape() |