python开发:scrapy爬虫(八)

本文实现用户登录功能

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
import scrapy
from scrapy.http import FormRequest
from scrapy.utils.response import open_in_browser


class BlogSpider(scrapy.Spider):
name = 'quotes'
start_urls = {
'http://quotes.toscrape.com/login'
}

def parse(self, response):
token = response.css('form input::attr(value)').extract_first()
return FormRequest.from_response(response, formdata={
'csrf_token': token,
'username': 'dsadsa',
'password': 'dsads'
}, callback=self.start_scraping)

def start_scraping(self, response):
# open_in_browser(response)
all_div_quotes = response.css('div.quote')
for quote in all_div_quotes:
title = quote.css('span.text::text').extract()
author = quote.css('.author::text').extract()
tag = quote.css('.tag::text').extract()
yield {'标题': title, '作者': author, '标签': tag}

补充:
调试scrapy爬虫
scrapytutorial/scrapytutorial目录,新建run.py文件

1
2
3
4
5
6
from scrapy import cmdline


name = 'quotes'
cmd = 'scrapy crawl {0}'.format(name)
cmdline.execute(cmd.split())

坚持原创技术分享,您的支持是我前进的动力!