web scraping - Python -- AttributeError: 'NoneType' object has no attribute 'find_all' -
i'm trying image links , description pages in python keep getting attributeerror:
traceback (most recent call last): file "d:\downloads\test.py", line 78, in <module> get_detail_book_list( get_book_list() ) file "d:\downloads\test.py", line 59, in get_detail_book_list image = div.find_all('img') attributeerror: 'nonetype' object has no attribute 'find_all'
here's code:
from bs4 import beautifulsoup selenium import webdriver class book(): """docstring book""" def __init__(self): self.title = "" self.link = "" self.image = "" #add new items class self.des = "" def get_book_list(): driver = webdriver.phantomjs(executable_path = r'd:\programs\phantomjs\bin\phantomjs.exe') url = 'https://www.amazon.com/s/ref=nb_sb_noss_1?url=search-alias%3daps&field-keywords=python+programming' driver.get(url) soup = beautifulsoup(driver.page_source, 'lxml') ul = soup.find('ul', {'id':'s-results-list-atf'}) # container whole book list book_list = [] li in ul.find_all('li', class_ = 's-result-item celwidget'): # each block product all_a = li.find_all('a') #find links product detail page # print all_a[1].text # print all_a[1]['href'] new_book = book() new_book.title = all_a[1].text new_book.link = all_a[1]['href'] book_list.append(new_book) driver.quit() return book_list def get_detail_book_list(book_list): driver = webdriver.phantomjs(executable_path = r'd:\programs\phantomjs\bin\phantomjs.exe') b in book_list[0:2]: #for every book (b) in book list / can try printing first 2 instead of test e.g: #for b in book_list[0:2]: #url = 'https://www.amazon.com/python-programming-introduction-computer-science/dp/1590282418/ref=sr_1_1?ie=utf8&qid=1473731166&sr=8-1&keywords=python+programming' url = b.link driver.get(url) soup = beautifulsoup(driver.page_source,'lxml') div = soup.find('div', class_ = 'rscontainer') image = div.find_all('img') links in img: print links['src'] b.image = image # not sure if need @ because same data printed without description = soup.find('p', class_ = 'read-more-text') print description.text b.des = description print '\n' driver.quit() get_detail_book_list( get_book_list() )
Comments
Post a Comment