diff options
author | terminaldweller <thabogre@gmail.com> | 2022-05-03 17:16:32 +0000 |
---|---|---|
committer | terminaldweller <thabogre@gmail.com> | 2022-05-03 17:16:32 +0000 |
commit | 6ab85750ae2d679a30be59a4ed7e78da52351234 (patch) | |
tree | b99e6315dfb118347acf9441d7c43dd2c30c37ff /crawler.py | |
parent | changed >>> to > for dmenu -D (diff) | |
download | kaminokumo-6ab85750ae2d679a30be59a4ed7e78da52351234.tar.gz kaminokumo-6ab85750ae2d679a30be59a4ed7e78da52351234.zip |
a crawler
Diffstat (limited to '')
-rw-r--r-- | crawler.py | 17 |
1 files changed, 17 insertions, 0 deletions
diff --git a/crawler.py b/crawler.py new file mode 100644 index 0000000..d10143d --- /dev/null +++ b/crawler.py @@ -0,0 +1,17 @@ +import scrapy + + +class QuotesSpider(scrapy.Spider): + name = "quotes" + start_urls = ["https://quotes.toscrape.com/tag/humor"] + + def parse(self, response): + for quote in response.css("div.quote"): + yield { + "author": quote.xpath("span/small/text()").get(), + "text": quote.css("span.text::text").get(), + } + + next_page = response.css('li.next a::attr("href")').get() + if next_page is not None: + yield response.follow(next_page, self.parse) |