import html5lib from html5lib.treebuilders import dom from html5lib.liberalxmlparser import * import unittest, re def sortattrs(match): name = match.group(1) attrs = re.findall('([-:\w]+)="([^"]*)"', match.group(2)) if not attrs: return "<%s%s%s>" % match.groups() attrs.sort() attrs = ' '.join(['%s="%s"' % (n,v) for n,v in attrs]) return "<%s %s%s>" % (name, attrs, match.group(3)) def ncr(match): return unichr(int(match.group(1))).encode('utf-8') xmlelem = re.compile(r'<(\w+)((?: [-:\w]+="[^"]*")+)(/?)>') class Xhtml5Test(unittest.TestCase): def assertXmlEquals(self, input, expected=None, parser=XMLParser): document = parser(tree=dom.TreeBuilder).parse(input).documentElement if not expected: expected = xmlelem.sub(sortattrs, input) expected = re.sub('&#(\d+);', ncr, expected) output = xmlelem.sub(sortattrs, document.toxml('utf-8')) self.assertEquals(expected, output) else: self.assertEquals(expected, document.toxml('utf-8')) def assertXhtmlEquals(self, input, expected=None, parser=XHTMLParser): self.assertXmlEquals(input, expected, parser) class BasicXhtml5Test(Xhtml5Test): def test_title_body_mismatched_close(self): self.assertXhtmlEquals( 'Xhtmlcontent', '' 'Xhtml' + 'content' + '') def test_title_body_named_charref(self): self.assertXhtmlEquals( 'mdashA &mdash B', '' 'mdash' + 'A '+ unichr(0x2014).encode('utf-8') + ' B' + '') class BasicXmlTest(Xhtml5Test): def test_comment(self): self.assertXmlEquals("") def test_cdata(self): self.assertXmlEquals("","foo") class OpmlTest(Xhtml5Test): def test_mixedCaseElement(self): self.assertXmlEquals( '' + 'Dave Winer' + '') def test_mixedCaseAttribute(self): self.assertXmlEquals( '' + '' + '') def test_malformed(self): self.assertXmlEquals( '' + '' + '', '' + '' + '',) class XhtmlTest(Xhtml5Test): def test_mathml(self): self.assertXhtmlEquals(""" MathML x = - b ± b 2 - 4 a c 2 a """) def test_svg(self): self.assertXhtmlEquals(""" SVG """) def test_xlink(self): self.assertXhtmlEquals(""" XLINK """) def test_br(self): self.assertXhtmlEquals(""" XLINK
""") def test_strong(self): self.assertXhtmlEquals(""" XLINK """) def buildTestSuite(): return unittest.defaultTestLoader.loadTestsFromName(__name__) def main(): buildTestSuite() unittest.main() if __name__ == '__main__': main()