python - srapy crawl mulitlayer append data in a list and yield this item -
the web site structure i'm trying parse using scrapy following:
i'd extracted data have format:
[{ "project":{"projectname":"project1"}, "samples":["sample1's_content","sample2's_content","sample3's_content"] }, { "project":{"projectname":"project2"}, "samples":["sample1's_content","samples2's_content","sample3's_content"] }]
i tried this:
from item import item class spider(scrapy.scrapy): name = spider def start_request(self): url = "the main page's url" yield scrapy.request(url=url, callback=self.parseprojectlist) def parseprojectlist(self, response): url in selector(project_list) yield scrapy.request(url=url, callback=self.parseproject) def parseproject(self, response): #scrap data myitem = item() myitem['samples']=[] myitem['project']={'projectname':projectname,...} yield scrapy.request(url=samplelistpage, callback=self.parsesamplelistpage,meta={'myitem':myitem}) def parsesamplelistpage(self, response): url in selector(sample_list) yield scrapy.request(url=url, callback=self.parsesample,meta={'myitem':'myitem'}) def parsesample(self, response): #parse sample data response.meta['myitem'].append(sample_data)
i tried put yield response.meta['myitem']
@ parsesamplelistpage
def parsesamplelistpage(self, response): url in selector(sample_list) yield scrapy.request(url=url, callback=self.parsesample,meta={'myitem':'myitem'}) yield response.meta['myitem']
and yield response.meta['myitem']
in parsesample
def parsesample(self, response): #parse sample data response.meta['myitem'].append(sample_data) yield response.meta['myitem']
both solutions failed.
the first 1 yields empty "samples" fields. second 1 creates multiple data same project this:
[ { "project": { "projectname": "project2" }, "samples": [ "sample1's_content" ] }, { "project": { "projectname": "project2" }, "samples": [ "sample1's_content", "sample2's_content" ] }, { "project": { "projectname": "project2" }, "samples": [ "sample1's_content", "sample2's_content", "sample3's_content" ] } ]
wonder there way deal problem?
Comments
Post a Comment