web scraping - Python -- AttributeError: 'NoneType' object has no attribute 'find_all' -


i'm trying image links , description pages in python keep getting attributeerror:

traceback (most recent call last):   file "d:\downloads\test.py", line 78, in <module>     get_detail_book_list( get_book_list() )   file "d:\downloads\test.py", line 59, in get_detail_book_list     image = div.find_all('img') attributeerror: 'nonetype' object has no attribute 'find_all' 

here's code:

from bs4 import beautifulsoup selenium import webdriver  class book():     """docstring book"""     def __init__(self):         self.title = ""         self.link = ""         self.image = "" #add new items class         self.des = ""   def get_book_list():          driver = webdriver.phantomjs(executable_path = r'd:\programs\phantomjs\bin\phantomjs.exe')      url = 'https://www.amazon.com/s/ref=nb_sb_noss_1?url=search-alias%3daps&field-keywords=python+programming'      driver.get(url)      soup = beautifulsoup(driver.page_source, 'lxml')      ul = soup.find('ul', {'id':'s-results-list-atf'}) # container whole book list      book_list = []      li in ul.find_all('li', class_ = 's-result-item celwidget'):   # each block product          all_a = li.find_all('a') #find links product detail page         # print all_a[1].text         # print all_a[1]['href']          new_book = book()         new_book.title = all_a[1].text         new_book.link = all_a[1]['href']         book_list.append(new_book)       driver.quit()      return book_list  def get_detail_book_list(book_list):      driver = webdriver.phantomjs(executable_path = r'd:\programs\phantomjs\bin\phantomjs.exe')      b in book_list[0:2]:    #for every book (b) in book list / can try printing first 2 instead of test e.g:                                  #for b in book_list[0:2]:           #url = 'https://www.amazon.com/python-programming-introduction-computer-science/dp/1590282418/ref=sr_1_1?ie=utf8&qid=1473731166&sr=8-1&keywords=python+programming'         url = b.link          driver.get(url)          soup = beautifulsoup(driver.page_source,'lxml')          div = soup.find('div', class_ = 'rscontainer')          image = div.find_all('img')          links in img:              print links['src']          b.image = image # not sure if need @ because same data printed without          description = soup.find('p', class_ =  'read-more-text')          print description.text          b.des = description          print '\n'        driver.quit()  get_detail_book_list( get_book_list() ) 


Comments

Popular posts from this blog

java - SSE Emitter : Manage timeouts and complete() -

jquery - uncaught exception: DataTables Editor - remote hosting of code not allowed -

java - How to resolve error - package com.squareup.okhttp3 doesn't exist? -