diff options
Diffstat (limited to '')
| -rwxr-xr-x | db/mongo/scraper.js | 56 | ||||
| -rw-r--r-- | db/postgres/security_rss_keyword.sql | 24 | ||||
| -rw-r--r-- | db/postgres/strip_ascii_escape.pgsql | 16 | 
3 files changed, 96 insertions, 0 deletions
| diff --git a/db/mongo/scraper.js b/db/mongo/scraper.js new file mode 100755 index 0000000..d28330f --- /dev/null +++ b/db/mongo/scraper.js @@ -0,0 +1,56 @@ +#!/usr/bin/env node +"use strict"; + +const x = require("x-ray")(); + +async function getHTML(url) { +  const response = await fetch(url, { +    credentials: "include", +    headers: { +      "User-Agent": +        "Mozilla/5.0 (Windows NT 10.0; WOW64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/113.0.5666.197 Safari/537.36", +      Accept: +        "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8", +      "Accept-Language": "en-US,en;q=0.5", +      "Upgrade-Insecure-Requests": "1", +      "Sec-Fetch-Dest": "document", +      "Sec-Fetch-Mode": "navigate", +      "Sec-Fetch-Site": "none", +      "Sec-Fetch-User": "?1", +      "Sec-GPC": "1", +      "sec-ch-ua-platform": '"Windows"', +      "sec-ch-ua": +        '"Google Chrome";v="113", "Chromium";v="113", "Not=A?Brand";v="24"', +      "sec-ch-ua-mobile": "?0", +      Pragma: "no-cache", +      "Cache-Control": "no-cache", +    }, +    method: "GET", +    mode: "cors", +  }); + +  if (response.ok) { +    console.log(response.body); +    const chunks = []; + +    for await (const chunk of response.body) { +      chunks.push(Buffer.from(chunk)); +    } + +    return Buffer.concat(chunks).toString("utf-8"); +  } +} + +async function main() { +  const resultHTML = await getHTML("https://chapmanganato.com/manga-kd987838"); + +  x( +    resultHTML, +    "a.chapter-name.text-nowrap" +  )(function (err, lastChapter) { +    console.log(lastChapter); +    return lastChapter; +  }); +} + +main(); diff --git a/db/postgres/security_rss_keyword.sql b/db/postgres/security_rss_keyword.sql new file mode 100644 index 0000000..6026246 --- /dev/null +++ b/db/postgres/security_rss_keyword.sql @@ -0,0 +1,24 @@ +create table if not exists security_rss_keyword_store ( +    id serial primary key not null, +    logdate timestamp without time zone default now(), +    nick varchar(128) not null, +    log varchar(1024) not null, +    channel varchar(256) not null +); + +create or replace function security_rss_trigger_function() +returns trigger +language plpgsql +as $$ +begin +    if NEW.log like any(array['%nvidia%','%intel%','%dell%']) then +        insert into security_rss_keyword_store(nick,log,channel) +        values (NEW.nick,NEW.log,NEW.channel); +end; +$$ + +create or replace trigger security_rss_trigger +after insert or update +on public.logs +for each row +execute function security_rss_trigger_function(); diff --git a/db/postgres/strip_ascii_escape.pgsql b/db/postgres/strip_ascii_escape.pgsql new file mode 100644 index 0000000..00714f6 --- /dev/null +++ b/db/postgres/strip_ascii_escape.pgsql @@ -0,0 +1,16 @@ +create or replace function strip_ascii_escape() returns trigger +language plpgsql as $$ +    begin +        NEW.log = regexp_replace(NEW.log,'\x1b\[[\x30-\x3f]*[\x20-\x2f]*[\x40-\x7e]','','g'); +        NEW.log = regexp_replace(NEW.log,'\x1b[PX^_].*?\x1b\\','','g'); +        NEW.log = regexp_replace(NEW.log,'\x1b\][^\a]*(?:\a|\x1b\\)','','g'); +        NEW.log = regexp_replace(NEW.log,'\x1b[\[\]A-Z\\^_@]','','g'); +        NEW.log = regexp_replace(NEW.log,'[\x00-\x1f\x7f-\x9f\xad]+','','g'); + +        return NEW; +    end; +$$; + +create trigger strip_ascii_escape +before insert on public.logs +for each row execute function strip_ascii_escape() | 
