login每天学习一点点,每天进步一点点.
当前位置:首页 >> python scrapy链接数据库

python scrapy链接数据库

2018-09-15 14:13:13  |  分类: Python |  标签: 阅读(192)评论(0)
# -*- coding: utf-8 -*-
import pymysql
import scrapy
from quotes.items import QuotesItem


conn = pymysql.connect(host="127.0.0.1",user="root",
    password="root",db="test",port=3306, charset = 'utf8')



class QuoteSpider(scrapy.Spider):
    name = 'quote'
    allowed_domains = ['quotes.toscrape.com']
    start_urls = ['http://quotes.toscrape.com/']
    def parse(self, response):
        item = QuotesItem()

        quotes = response.css('.quote')
        for quote in quotes:
            text = quote.css('.text::text').extract_first()
            author = quote.css('.author::text').extract_first()
            tags = quote.css('.tags .tag::text').extract()

            item['text'] = text
            item['author'] = author
            item['tags'] = tags
            
            conn.ping(reconnect=True)
            cur = conn.cursor()
            cur.execute('insert into test(`add`,`adds`) values(%s,%s)',(text,author))
            conn.commit()

            
            

            yield item

        next = response.css('.pager .next a::attr(href)').extract_first()
        url = response.urljoin(next)
        yield scrapy.Request(url=url, callback=self.parse)
        
        cur.close()
        conn.close()


上一篇:ubuntu使用nmap查询端口 下一篇:ThinkPHP5 的视图$view->fetch()和$view->display()的区别

猜你喜欢

发表评论:

0.052547s