最新文章专题视频专题问答1问答10问答100问答1000问答2000关键字专题1关键字专题50关键字专题500关键字专题1500TAG最新视频文章推荐1 推荐3 推荐5 推荐7 推荐9 推荐11 推荐13 推荐15 推荐17 推荐19 推荐21 推荐23 推荐25 推荐27 推荐29 推荐31 推荐33 推荐35 推荐37视频文章20视频文章30视频文章40视频文章50视频文章60 视频文章70视频文章80视频文章90视频文章100视频文章120视频文章140 视频2关键字专题关键字专题tag2tag3文章专题文章专题2文章索引1文章索引2文章索引3文章索引4文章索引5123456789101112131415文章专题3
当前位置: 首页 - 科技 - 知识百科 - 正文

使用python解析xml成对应的html示例分享

来源:动视网 责编:小采 时间:2020-11-27 14:30:11
文档

使用python解析xml成对应的html示例分享

使用python解析xml成对应的html示例分享:SAX将dd.xml解析成html。当然啦,如果得到了xml对应的xsl文件可以直接用libxml2将其转换成html。 代码如下:#!/usr/bin/env python # -*- coding: utf-8 -*-#---------------------------------------# 程序:XML解析器
推荐度:
导读使用python解析xml成对应的html示例分享:SAX将dd.xml解析成html。当然啦,如果得到了xml对应的xsl文件可以直接用libxml2将其转换成html。 代码如下:#!/usr/bin/env python # -*- coding: utf-8 -*-#---------------------------------------# 程序:XML解析器


SAX将dd.xml解析成html。当然啦,如果得到了xml对应的xsl文件可以直接用libxml2将其转换成html。

代码如下:


#!/usr/bin/env python
# -*- coding: utf-8 -*-
#---------------------------------------
# 程序:XML解析器
# 版本:01.0
# 作者:mupeng
# 日期:2013-12-18
# 语言:Python 2.7
# 功能:将xml解析成对应的html
# 注解:该程序用xml.sax模块的parse函数解析XML,并生成事件
# 继承ContentHandler并重写其事件处理函数
# Dispatcher主要用于相应标签的起始、结束事件的派发
#---------------------------------------
from xml.sax.handler import ContentHandler
from xml.sax import parse

class Dispatcher:
def dispatch(self, prefix, name, attrs=None):
mname = prefix + name.capitalize()
dname = 'default' + prefix.capitalize()
method = getattr(self, mname, None)
if callable(method): args = ()
else:
method = getattr(self, dname, None)
#args = name
#if prefix == 'start': args += attrs
if callable(method): method()

def startElement(self, name, attrs):
self.dispatch('start', name, attrs)

def endElement(self, name):
self.dispatch('end', name)

class Website(Dispatcher, ContentHandler):

def __init__(self):
self.fout = open('ddt_SAX.html', 'w')
self.imagein = False
self.desflag = False
self.item = False
self.title = ''
self.link = ''
self.guid = ''
self.url = ''
self.pubdate = ''
self.description = ''
self.temp = ''
self.prx = ''
def startChannel(self):

self.fout.write('''\n\n RSS-''')<p> def endChannel(self):<BR> self.fout.write('''<BR> <tr><td height="20"></td></tr><BR> </table><BR> </center><BR> <BR> function GetTimeDiff(str)<BR> {<BR> if(str == '')<BR> {<BR> return '';<BR> }</P> <P> var pubDate = new Date(str);<BR> var nowDate = new Date();<BR> var diffMilSeconds = nowDate.valueOf()-pubDate.valueOf();<BR> var days = diffMilSeconds/86400000;<BR> days = parseInt(days);</P> <P> diffMilSeconds = diffMilSeconds-(days*86400000);<BR> var hours = diffMilSeconds/3600000;<BR> hours = parseInt(hours);</P> <P> diffMilSeconds = diffMilSeconds-(hours*3600000);<BR> var minutes = diffMilSeconds/60000;<BR> minutes = parseInt(minutes);</P> <P> diffMilSeconds = diffMilSeconds-(minutes*60000);<BR> var seconds = diffMilSeconds/1000;<BR> seconds = parseInt(seconds);<p> var returnStr = "±±¾©·¢²¼Ê±¼ä£º" + pubDate.toLocaleString();</P> <P> if(days > 0)<BR> {<BR> returnStr = returnStr + " £¨¾àÀëÏÖÔÚ" + days + "Ìì" + hours + "Сʱ" + minutes + "·ÖÖÓ£©";<BR> }<BR> else if (hours > 0)<BR> {<BR> returnStr = returnStr + " £¨¾àÀëÏÖÔÚ" + hours + "Сʱ" + minutes + "·ÖÖÓ£©";<BR> }<BR> else if (minutes > 0)<BR> {<BR> returnStr = returnStr + " £¨¾àÀëÏÖÔÚ" + minutes + "·ÖÖÓ£©";<BR> }</P> <P> return returnStr;</P> <P> }</P> <P> function GetSpanText()<BR> {<BR> var pubDate;<BR> var pubDateArray;<BR> var spanArray = document.getElementsByTagName("span");</P> <P> for(var i = 0; i < spanArray.length; i++)<BR> {<BR> pubDate = spanArray[i].innerHTML;<BR> document.getElementsByTagName("span")[i].innerHTML = GetTimeDiff(pubDate); <BR> }<BR> }</P> <P> GetSpanText();<BR> </ script><BR> </body><BR> </html><BR> ''')<BR> self.fout.close()</P> <P> def characters(self, chars):<BR> if chars.strip():<BR> #chars = chars.strip()<BR> self.temp += chars<BR> #print self.temp<p> <BR> def startTitle(self):<p> if self.item:<BR> self.fout.write('''<BR> <tr bgcolor="#eeeeee">\n<td style="padding-top:5px;padding-left:5px;" height="30">\n<B><BR> ''')<p> def endTitle(self):<p> if not self.imagein and not self.item:<BR> self.title = self.temp<BR> self.temp = ''<BR> self.fout.write(self.title.encode('gb2312'))<p> #self.title = self.temp<BR> self.fout.write('''<BR> \n\n\n

\n
\n

function copyLink()
{
clipboardData.setData("Text",window.location.href);
alert("RSSÁ´½ÓÒѾ­¸´ÖƵ½¼ôÌù°å");
}

function subscibeLink()
{
var str = window.location.pathname;
while(str.match(/^\//))
{
str = str.replace(/^\//,"");
}
window.open("http://rss.sina.com.cn/my_sina_web_rss_news.html?url=" + str,"_self");

}
\n

\n
\n







\n
''')

if self.item:
self.title = self.temp
self.temp = ''
self.fout.write(self.title.encode('gb2312'))
self.fout.write('''


''')

def startImage(self):
self.imagein = True

def endImage(self):
self.imagein = False

def startLink(self):
if self.imagein:
self.fout.write('''

def endLink(self):
self.link = self.temp
self.temp = ''
if self.imagein:
self.fout.write(self.link.encode('gb2312'))
self.fout.write('''" target="_blank">\n ''')
elif self.item:
#self.link = self.temp
pass
else:
self.fout.write(self.link)
self.fout.write(''' " target="
_blank
"> ''')
self.fout.write(self.title.encode('gb2312'))
self.fout.write('''


''')
self.fout.write(self.description.encode('gb2312'))
self.fout.write('''
¸´ÖÆ´ËÒ³Á´½Ó ÎÒҪǶÈë¸ÃÐÂÎÅÁÐ±íµ½ÎÒµÄÒ³Ãæ£¨¼òµ¥¡¢¿ìËÙ¡¢ÊµÊ±¡¢Ãâ·Ñ£©


''')

def startUrl(self):
if self.imagein:
self.fout.write('''\n










''')

#程序入口
if __name__ == '__main__':
parse('ddt.xml', Website())

文档

使用python解析xml成对应的html示例分享

使用python解析xml成对应的html示例分享:SAX将dd.xml解析成html。当然啦,如果得到了xml对应的xsl文件可以直接用libxml2将其转换成html。 代码如下:#!/usr/bin/env python # -*- coding: utf-8 -*-#---------------------------------------# 程序:XML解析器
推荐度:
标签: 分享 解析 html
  • 热门焦点

最新推荐

猜你喜欢

热门推荐

Top
''')
if self.item:
#self.url = self.temp
pass

def defaultStart(self):
pass
def defaultEnd(self):
self.temp = ''
def startDescription(self):
pass
def endDescription(self):
self.description = self.temp
self.temp = ''
if self.item:
#self.fout.write('¡¡¡¡')
self.fout.write(self.description.encode('gb2312'))

def endGuid(self):
self.guid = self.temp
def endPubdate(self):
if not self.temp.startswith('http'):
self.pubdate = self.temp
self.temp = ''
else:
self.pubdate = ''
def startItem(self):
self.item = True
def endItem(self):
self.item = False
self.fout.write('''


self.fout.write(self.link)
self.fout.write(''' " target="_blank"> ''')
self.fout.write(self.guid)
self.fout.write('''

''')
self.fout.write(self.pubdate)
self.fout.write('''