
安卓最美应用页面爬虫,爬虫很简单,设计的东西到挺多的
文件操作
正则表达式
字符串替换等等
import requests
import re
url = "http://zuimeia.com"
r = requests.get('http://zuimeia.com/community/app/hot/?platform=2')
pattern = re.compile(r'')
urlList = pattern.findall(r.content)
def requestsUrl(url):
r = requests.get(url)
title = re.findall(r'"app-title">(.*?)
',r.content)
#print title
category = re.findall(r'(.*?)',r.content)
#print category
describe = re.findall(r'(.*?)',r.content)
#print type(describe[0])
strdescribe = srtReplace(describe[0])
#print strdescribe
downloadUrl = re.findall(r'', '
', '', '', '', '', '', '', '','','', '
', '
', '
', '', '', '',
'', '','','', '']
for eachListReplace in listReplace:
string = string.replace(str(eachListReplace),'
')
string = string.replace('
','')
return string
def categornFinal(category):
categoryFinal =''
for eachCategory in category:
categoryFinal = categoryFinal+str(eachCategory)+'-->'
return categoryFinal
def urlReplace(url):
url = url.replace('&', '&')
return url
requestsUrl("http://zuimeia.com/community/app/27369/?platform=2")
for eachUrl in urlList:
eachUrl = url+eachUrl
content = requestsUrl(eachUrl)
categoryFinal =''
title = content[0][0]
category = categornFinal(content[1])
strdescribe = content[2]
downloadUrl = urlReplace(content[3][0])
with open('c:/wqa.txt', 'a+') as fd:
fd.write('title:'+title+'
'+'category:'+category+'
'+'strdescribe:'+strdescribe+'
'+'downloadUrl:'+downloadUrl+'
-----------------------------------------------------------------------------------------------------------------------------
')