

效果展示
import requests import re

翻页查看图片模式
#获取每页图片的访问链接
def get_page():
urls=['http://image.baidu.com/search/flip?tn=baiduimage&ie=utf-8&word=%E7%BE%8E%E5%A5%B3%E5%9B%BE%E7%89%87&pn={}&gsm=3c00000000003c'.format(num) for num in range(0,20000,20)]
for url in urls:
print(url)
get_img_link(url)#从网页中获取每个图片的访问链接 def get_img_link(url): r=requests.get(url) #print(r.encoding) r.encoding='utf-8' html_code=r.text reg=re.compile(r'"objURL":"(.*?)"') imgs=re.findall(reg,html_code) # print(imgs) for img in imgs: #print(img) down_img(img)
#图片下载保存再本地
def down_img(url):
web_data=requests.get(url)
filename=url.split('/')[-1]
targetfile='E:/pict_baidu/{}'.format(filename)
with open(targetfile,'wb') as f:
f.write(web_data.content)if name=='main': get_page()
爬虫思路-获取多页访问链接->获取每页图片链接->图片下载;
正则表达式的使用;
format与with open as语法的使用;
编码方式调整;
requests与re模块的使用。
