本文实现用户登录功能
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27
| import scrapy from scrapy.http import FormRequest from scrapy.utils.response import open_in_browser
class BlogSpider(scrapy.Spider): name = 'quotes' start_urls = { 'http://quotes.toscrape.com/login' }
def parse(self, response): token = response.css('form input::attr(value)').extract_first() return FormRequest.from_response(response, formdata={ 'csrf_token': token, 'username': 'dsadsa', 'password': 'dsads' }, callback=self.start_scraping)
def start_scraping(self, response): all_div_quotes = response.css('div.quote') for quote in all_div_quotes: title = quote.css('span.text::text').extract() author = quote.css('.author::text').extract() tag = quote.css('.tag::text').extract() yield {'标题': title, '作者': author, '标签': tag}
|
补充:
调试scrapy爬虫
scrapytutorial/scrapytutorial目录,新建run.py文件
1 2 3 4 5 6
| from scrapy import cmdline
name = 'quotes' cmd = 'scrapy crawl {0}'.format(name) cmdline.execute(cmd.split())
|