From 6ab85750ae2d679a30be59a4ed7e78da52351234 Mon Sep 17 00:00:00 2001 From: terminaldweller Date: Tue, 3 May 2022 21:46:32 +0430 Subject: a crawler --- crawler.py | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) create mode 100644 crawler.py diff --git a/crawler.py b/crawler.py new file mode 100644 index 0000000..d10143d --- /dev/null +++ b/crawler.py @@ -0,0 +1,17 @@ +import scrapy + + +class QuotesSpider(scrapy.Spider): + name = "quotes" + start_urls = ["https://quotes.toscrape.com/tag/humor"] + + def parse(self, response): + for quote in response.css("div.quote"): + yield { + "author": quote.xpath("span/small/text()").get(), + "text": quote.css("span.text::text").get(), + } + + next_page = response.css('li.next a::attr("href")').get() + if next_page is not None: + yield response.follow(next_page, self.parse) -- cgit v1.2.3