Twisted3#
Question#
Parse the html of a file in twisted.
Solution#
# -*- coding: utf8 -*-
import io as StringIO
from twisted.internet import reactor
from twisted.web.client import getPage
from twisted.python.util import println
from lxml import etree
def parseHtml(html):
parser = etree.HTMLParser(encoding='utf8')
tree = etree.parse(StringIO.StringIO(html), parser)
return tree
def extractTitle(tree):
return tree
#titleText = unicode(tree.xpath("//title/text()")[0])
#return titleText
d = getPage('http://www.uthcode.com')
d.addCallback(parseHtml)
d.addCallback(extractTitle)
d.addBoth(println)
reactor.run()