python - I want to use scrapy just download .jpg picture -
i have question want down load .jpg image. used follow code found still download .png or .gif picture. can me ?
#coding:utf-8 scrapy.spiders import spider scrapy.selector import selector jianshu.items import jianshuitem import scrapy scrapy.crawler import crawlerprocess class jiansider(spider): name = "jiantu" allowed_domains = [] start_urls= [ "https://tieba.baidu.com/p/5227563995" ] def parse(self, response): sel = selector(response) sites = sel.xpath('//div/img/@src').extract() item = jianshuitem() item['image_url'] = response.xpath('//div/img/@src').extract() url in item['image_url']: list_photo = url.split('.') photo_type = list_photo[len(list_photo)-1] print photo_type if photo_type != 'jpg': #print url item['image_url'].remove(url) #print "delete1" yield item total_page = response.xpath('//span[@class="red"]/text()').extract() now_page = response.xpath('//li/span[@class="tp"]/text()').extract() tpa=total_page[len(total_page)-1] npa=now_page[len(now_page)-1] tpage= int(tpa) npage= int (npa) print "present page -----" print npage print "total page ------" print tpage starturls = 'https://tieba.baidu.com/p/5227563995?pn=' if npage != tpage: npage = npage+1 new_url = '%s%s'%(starturls,npage) print "new_url ------------" print new_url if new_url: yield scrapy.request(new_url,callback = self.parse)
i use if page!= .jpg avoid download photo type isn't jpg failed. can tell me why , me deal ?
Comments
Post a Comment