Python, web scraping: nested loop not working -
the nested loop variable j not working. debugger skips on though variables needed before seem initialized.
from urllib.request import request, urlopen # beautifulsoup4 with: pip install beautifulsoup4 import bs4 import pdb import sys import json site = "http://bgp.he.net/report/world" hdr = {'user-agent': 'mozilla/5.0'} req = request(site,headers=hdr) page = urlopen(req) soup = bs4.beautifulsoup(page, 'html.parser') t in soup.find_all('td', class_='centeralign'): s = str(t.string) if s != "none": print (s.strip()) site2 = "http://bgp.he.net/country/" + s.strip() req = request(site2,headers=hdr) soup2 = bs4.beautifulsoup(page, 'html.parser') j in soup2.find_all('td'): s2 = str(j.string) print (j.strip())
from urllib.request import request, urlopen # beautifulsoup4 with: pip install beautifulsoup4 import bs4 import pdb import sys import json site = "http://bgp.he.net/report/world" hdr = {'user-agent': 'mozilla/5.0'} req = request(site,headers=hdr) page = urlopen(req) soup = bs4.beautifulsoup(page, 'html.parser') t in soup.find_all('td', class_='centeralign'): s = str(t.string) if s != "none": print(s.strip()) site2 = "http://bgp.he.net/country/" + s.strip() req2 = request(site2,headers=hdr) # missed these 2 lines page2 = urlopen(req2) soup2 = bs4.beautifulsoup(page2, 'html.parser') j in soup2.find_all('td'): s2 = str(j.text) print(s2.strip()) # wrong variable used strip
Comments
Post a Comment