diff options
Diffstat (limited to '')
-rw-r--r-- | crawler.py | 17 |
1 files changed, 17 insertions, 0 deletions
diff --git a/crawler.py b/crawler.py new file mode 100644 index 0000000..d10143d --- /dev/null +++ b/crawler.py @@ -0,0 +1,17 @@ +import scrapy + + +class QuotesSpider(scrapy.Spider): + name = "quotes" + start_urls = ["https://quotes.toscrape.com/tag/humor"] + + def parse(self, response): + for quote in response.css("div.quote"): + yield { + "author": quote.xpath("span/small/text()").get(), + "text": quote.css("span.text::text").get(), + } + + next_page = response.css('li.next a::attr("href")').get() + if next_page is not None: + yield response.follow(next_page, self.parse) |