multithreading - Multi-thread in Python -
this question has answer here:
i'm following book "automate boring tasks python" , i'm trying create progrma downloads multiple comics http://xkcd.com
simultaneously, has ran problems. i'm copying exact same program on book.
here's code:
# multidownloadxkcd.py - downloads xkcd comics using multiple threads. import requests, os ,bs4, threading os.chdir('c:\\users\\patty\\desktop') os.makedirs('xkcd', exist_ok=true) # store comics on ./xkcd def downloadxkcd(startcomic, endcomic): urlnumber in range(startcomic, endcomic): #download page print('downloading page http://xkcd.com/%s...' %(urlnumber)) res = requests.get('http://xkcd.com/%s' % (urlnumber)) res.raise_for_status() soup= bs4.beautifulsoup(res.text, "html.parser") #find url of comic image. comicelem = soup.select('#comic img') if comicelem == []: print('could not find comic image.') else: comicurl = comicelem[0].get('src') #download image. print('downloading image %s...' % (comicurl)) res = requests.get(comicurl, "html.parser") res.raise_for_status() #save image ./xkcd. imagefile = open(os.path.join('xkcd', os.path.basename(comicurl)), 'wb') chunk in res.iter_content(100000): imagefile.write(chunk) imagefile.close() downloadthreads = [] # list of thread objects in range(0,1400, 100): # loops 14 times, creates 14 threads downloadthread = threading.thread(target=downloadxkcd, args=(i, + 99)) downloadthreads.append(downloadthread) downloadthread.start() # wait threads end. downloadthread in downloadthreads: downloadthread.join() print('done.')
i'm getting following exception:
exception in thread thread-1: traceback (most recent call last): file "c:\python\python35\lib\threading.py", line 914, in _bootstrap_inner self.run() file "c:\python\python35\lib\threading.py", line 862, in run self._target(*self._args, **self._kwargs) file "c:\users\patty\pycharmprojects\ch15_tasks\practice.py", line 13, in downloadxkcd res.raise_for_status() file "c:\python\python35\lib\site-packages\requests\models.py", line 862, in raise_for_status raise httperror(http_error_msg, response=self) requests.exceptions.httperror: 404 client error: not found url: http://xkcd.com/0 exception in thread thread-2: traceback (most recent call last): file "c:\python\python35\lib\threading.py", line 914, in _bootstrap_inner self.run() file "c:\python\python35\lib\threading.py", line 862, in run self._target(*self._args, **self._kwargs) file "c:\users\patty\pycharmprojects\ch15_tasks\practice.py", line 25, in downloadxkcd res = requests.get(comicurl, "html.parser") file "c:\python\python35\lib\site-packages\requests\api.py", line 70, in return request('get', url, params=params, **kwargs) file "c:\python\python35\lib\site-packages\requests\api.py", line 56, in request return session.request(method=method, url=url, **kwargs) file "c:\python\python35\lib\site-packages\requests\sessions.py", line 461, in request prep = self.prepare_request(req) file "c:\python\python35\lib\site-packages\requests\sessions.py", line 394, in prepare_request hooks=merge_hooks(request.hooks, self.hooks), file "c:\python\python35\lib\site-packages\requests\models.py", line 294, in prepare self.prepare_url(url, params) file "c:\python\python35\lib\site-packages\requests\models.py", line 354, in prepare_url raise missingschema(error) requests.exceptions.missingschema: invalid url '//imgs.xkcd.com/comics/family_circus.jpg': no schema supplied. perhaps meant http:////imgs.xkcd.com/comics/family_circus.jpg?
it's says url invalid whenever copy paste url webrowser seems valid. know how fix this? thanks
yea , @spectras said , because url fixes url doesn't mean valid. try using "http://www." before , try see if working.
Comments
Post a Comment