# Differences from the current specification (23 December 2006) are as follows: # * Phases and insertion modes are one concept in parser.py. # * EOF handling is slightly different to make sure , and # always exist. # * We also deal with content when there's no DOCTYPE. # It is expected that the specification will catch up with us in due course ;-) # # It should be trivial to add the following cases. However, we should probably # also look into comment handling and such then... # * A

element end tag creates an empty

element when there's no

# element in scope. # * A
element end tag creates an empty
element. try: frozenset except NameError: # Import from the sets module for python 2.3 from sets import Set as set from sets import ImmutableSet as frozenset import gettext _ = gettext.gettext import tokenizer import treebuilders from treebuilders._base import Marker from treebuilders import simpletree import utils from constants import contentModelFlags, spaceCharacters, asciiUpper2Lower from constants import scopingElements, formattingElements, specialElements from constants import headingElements, tableInsertModeElements, voidElements class HTMLParser(object): """HTML parser. Generates a tree structure from a stream of (possibly malformed) HTML""" def __init__(self, strict = False, tree=simpletree.TreeBuilder): """ strict - raise an exception when a parse error is encountered tree - a treebuilder class controlling the type of tree that will be returned. This class is almost always a subclass of html5lib.treebuilders._base.TreeBuilder """ # Raise an exception on the first error encountered self.strict = strict self.tree = tree() self.errors = [] self.phases = { "initial": InitialPhase(self, self.tree), "rootElement": RootElementPhase(self, self.tree), "beforeHead": BeforeHeadPhase(self, self.tree), "inHead": InHeadPhase(self, self.tree), "afterHead": AfterHeadPhase(self, self.tree), "inBody": InBodyPhase(self, self.tree), "inTable": InTablePhase(self, self.tree), "inCaption": InCaptionPhase(self, self.tree), "inColumnGroup": InColumnGroupPhase(self, self.tree), "inTableBody": InTableBodyPhase(self, self.tree), "inRow": InRowPhase(self, self.tree), "inCell": InCellPhase(self, self.tree), "inSelect": InSelectPhase(self, self.tree), "afterBody": AfterBodyPhase(self, self.tree), "inFrameset": InFramesetPhase(self, self.tree), "afterFrameset": AfterFramesetPhase(self, self.tree), "trailingEnd": TrailingEndPhase(self, self.tree) } def parse(self, stream, encoding=None, innerHTML=False): """Parse a HTML document into a well-formed tree stream - a filelike object or string containing the HTML to be parsed innerHTML - Are we parsing in innerHTML mode (note innerHTML=True is not yet supported) The optional encoding parameter must be a string that indicates the encoding. If specified, that encoding will be used, regardless of any BOM or later declaration (such as in a meta element) """ self.tree.reset() self.firstStartTag = False self.errors = [] self.phase = self.phases["initial"] # We only seem to have InBodyPhase testcases where the following is # relevant ... need others too self.lastPhase = None # We don't actually support innerHTML yet but this should allow # assertations self.innerHTML = innerHTML self.tokenizer = tokenizer.HTMLTokenizer(stream, encoding) # XXX This is temporary for the moment so there isn't any other # changes needed for the parser to work with the iterable tokenizer for token in self.tokenizer: token = self.normalizeToken(token) type = token["type"] method = getattr(self.phase, "process%s" % type, None) if type in ("Characters", "SpaceCharacters", "Comment"): method(token["data"]) elif type in ("StartTag", "Doctype"): method(token["name"], token["data"]) elif type == "EndTag": method(token["name"]) else: self.parseError(token["data"]) # When the loop finishes it's EOF self.phase.processEOF() return self.tree.getDocument() def parseError(self, data="XXX ERROR MESSAGE NEEDED"): # XXX The idea is to make data mandatory. self.errors.append((self.tokenizer.stream.position(), data)) if self.strict: raise ParseError def atheistParseError(self): """This error is not an error""" pass def normalizeToken(self, token): """ HTML5 specific normalizations to the token stream """ if token["type"] == "EmptyTag": # When a solidus (/) is encountered within a tag name what happens # depends on whether the current tag name matches that of a void # element. If it matches a void element atheists did the wrong # thing and if it doesn't it's wrong for everyone. if token["name"] in voidElements: self.atheistParseError() else: self.parseError(_("Solidus (/) incorrectly placed in tag.")) token["type"] = "StartTag" if token["type"] == "StartTag": token["name"] = token["name"].translate(asciiUpper2Lower) # We need to remove the duplicate attributes and convert attributes # to a dict so that [["x", "y"], ["x", "z"]] becomes {"x": "y"} # AT When Python 2.4 is widespread we should use # dict(reversed(token.data)) if token["data"]: token["data"] = dict([(attr.translate(asciiUpper2Lower), value) for attr,value in token["data"][::-1]]) else: token["data"] = {} elif token["type"] == "EndTag": if token["data"]: self.parseError(_("End tag contains unexpected attributes.")) token["name"] = token["name"].lower() return token def resetInsertionMode(self): # The name of this method is mostly historical. (It's also used in the # specification.) last = False newModes = { "select":"inSelect", "td":"inCell", "th":"inCell", "tr":"inRow", "tbody":"inTableBody", "thead":"inTableBody", "tfoot":"inTableBody", "caption":"inCaption", "colgroup":"inColumnGroup", "table":"inTable", "head":"inBody", "body":"inBody", "frameset":"inFrameset" } for node in self.tree.openElements[::-1]: if node == self.tree.openElements[0]: last = True if node.name not in ['td', 'th']: # XXX assert self.innerHTML raise NotImplementedError # Check for conditions that should only happen in the innerHTML # case if node.name in ("select", "colgroup", "head", "frameset"): # XXX assert self.innerHTML if node.name in newModes: self.phase = self.phases[newModes[node.name]] break elif node.name == "html": if self.tree.headPointer is None: self.phase = self.phases["beforeHead"] else: self.phase = self.phases["afterHead"] break elif last: self.phase = self.phases["body"] break class Phase(object): """Base class for helper object that implements each phase of processing """ # Order should be (they can be omitted): # * EOF # * Comment # * Doctype # * SpaceCharacters # * Characters # * StartTag # - startTag* methods # * EndTag # - endTag* methods def __init__(self, parser, tree): self.parser = parser self.tree = tree def processEOF(self): self.tree.generateImpliedEndTags() if len(self.tree.openElements) > 2: self.parser.parseError(_("Unexpected end of file. " u"Missing closing tags.")) elif len(self.tree.openElements) == 2 and\ self.tree.openElements[1].name != "body": # This happens for framesets or something? self.parser.parseError(_("Unexpected end of file. Expected end " u"tag (" + self.tree.openElements[1].name + u") first.")) elif self.parser.innerHTML and len(self.tree.openElements) > 1 : # XXX This is not what the specification says. Not sure what to do # here. self.parser.parseError(_("XXX innerHTML EOF")) # Betting ends. def processComment(self, data): # For most phases the following is correct. Where it's not it will be # overridden. self.tree.insertComment(data, self.tree.openElements[-1]) def processDoctype(self, name, error): self.parser.parseError(_("Unexpected DOCTYPE. Ignored.")) def processSpaceCharacters(self, data): self.tree.insertText(data) def processStartTag(self, name, attributes): self.startTagHandler[name](name, attributes) def startTagHtml(self, name, attributes): if self.parser.firstStartTag == False and name == "html": self.parser.parseError(_("html needs to be the first start tag.")) # XXX Need a check here to see if the first start tag token emitted is # this token... If it's not, invoke self.parser.parseError(). for attr, value in attributes.iteritems(): if attr not in self.tree.openElements[0].attributes: self.tree.openElements[0].attributes[attr] = value self.parser.firstStartTag = False def processEndTag(self, name): self.endTagHandler[name](name) class InitialPhase(Phase): # This phase deals with error handling as well which is currently not # covered in the specification. The error handling is typically known as # "quirks mode". It is expected that a future version of HTML5 will defin # this. def processEOF(self): self.parser.parseError(_(u"Unexpected End of file. Expected DOCTYPE.")) self.parser.phase = self.parser.phases["rootElement"] self.parser.phase.processEOF() def processComment(self, data): self.tree.insertComment(data, self.tree.document) def processDoctype(self, name, error): if error: self.parser.parseError(_("Erroneous DOCTYPE.")) self.tree.insertDoctype(name) self.parser.phase = self.parser.phases["rootElement"] def processSpaceCharacters(self, data): self.tree.insertText(data, self.tree.document) def processCharacters(self, data): self.parser.parseError(_(u"Unexpected non-space characters. " u"Expected DOCTYPE.")) self.parser.phase = self.parser.phases["rootElement"] self.parser.phase.processCharacters(data) def processStartTag(self, name, attributes): self.parser.parseError(_(u"Unexpected start tag (" + name +\ u"). Expected DOCTYPE.")) self.parser.phase = self.parser.phases["rootElement"] self.parser.phase.processStartTag(name, attributes) def processEndTag(self, name): self.parser.parseError(_(u"Unexpected end tag (" + name +\ "). Expected DOCTYPE.")) self.parser.phase = self.parser.phases["rootElement"] self.parser.phase.processEndTag(name) class RootElementPhase(Phase): # helper methods def insertHtmlElement(self): element = self.tree.createElement("html", {}) self.tree.openElements.append(element) self.tree.document.appendChild(element) self.parser.phase = self.parser.phases["beforeHead"] # other def processEOF(self): self.insertHtmlElement() self.parser.phase.processEOF() def processComment(self, data): self.tree.insertComment(data, self.tree.document) def processSpaceCharacters(self, data): self.tree.insertText(data, self.tree.document) def processCharacters(self, data): self.insertHtmlElement() self.parser.phase.processCharacters(data) def processStartTag(self, name, attributes): if name == "html": self.parser.firstStartTag = True self.insertHtmlElement() self.parser.phase.processStartTag(name, attributes) def processEndTag(self, name): self.insertHtmlElement() self.parser.phase.processEndTag(name) class BeforeHeadPhase(Phase): def __init__(self, parser, tree): Phase.__init__(self, parser, tree) self.startTagHandler = utils.MethodDispatcher([ ("html", self.startTagHtml), ("head", self.startTagHead) ]) self.startTagHandler.default = self.startTagOther self.endTagHandler = utils.MethodDispatcher([ ("html", self.endTagHtml) ]) self.endTagHandler.default = self.endTagOther def processEOF(self): self.startTagHead("head", {}) self.parser.phase.processEOF() def processCharacters(self, data): self.startTagHead("head", {}) self.parser.phase.processCharacters(data) def startTagHead(self, name, attributes): self.tree.insertElement(name, attributes) self.tree.headPointer = self.tree.openElements[-1] self.parser.phase = self.parser.phases["inHead"] def startTagOther(self, name, attributes): self.startTagHead("head", {}) self.parser.phase.processStartTag(name, attributes) def endTagHtml(self, name): self.startTagHead("head", {}) self.parser.phase.processEndTag(name) def endTagOther(self, name): self.parser.parseError(_("Unexpected end tag (" + name +\ ") after the (implied) root element.")) class InHeadPhase(Phase): def __init__(self, parser, tree): Phase.__init__(self, parser, tree) self.startTagHandler = utils.MethodDispatcher([ ("html", self.startTagHtml), ("title", self.startTagTitle), ("style", self.startTagStyle), ("script", self.startTagScript), (("base", "link", "meta"), self.startTagBaseLinkMeta), ("head", self.startTagHead) ]) self.startTagHandler.default = self.startTagOther self. endTagHandler = utils.MethodDispatcher([ ("head", self.endTagHead), ("html", self.endTagHtml), (("title", "style", "script"), self.endTagTitleStyleScript) ]) self.endTagHandler.default = self.endTagOther # helper def appendToHead(self, element): if self.tree.headPointer is not None: self.tree.headPointer.appendChild(element) else: assert self.parser.innerHTML self.tree.openElements[-1].appendChild(element) # the real thing def processEOF(self): if self.tree.openElements[-1].name in ("title", "style", "script"): self.parser.parseError(_(u"Unexpected end of file. " u"Expected end tag (" + self.tree.openElements[-1].name + ").")) self.tree.openElements.pop() self.anythingElse() self.parser.phase.processEOF() def processCharacters(self, data): if self.tree.openElements[-1].name in ("title", "style", "script"): self.tree.insertText(data) else: self.anythingElse() self.parser.phase.processCharacters(data) def startTagHead(self, name, attributes): self.tree.insertElement(name, attributes) self.tree.headPointer = self.tree.openElements[-1] self.parser.phase = self.parser.phases["inHead"] def startTagTitle(self, name, attributes): element = self.tree.createElement(name, attributes) self.appendToHead(element) self.tree.openElements.append(element) self.parser.tokenizer.contentModelFlag = contentModelFlags["RCDATA"] def startTagStyle(self, name, attributes): element = self.tree.createElement(name, attributes) if self.tree.headPointer is not None and\ self.parser.phase == self.parser.phases["inHead"]: self.appendToHead(element) else: self.tree.openElements[-1].appendChild(element) self.tree.openElements.append(element) self.parser.tokenizer.contentModelFlag = contentModelFlags["CDATA"] def startTagScript(self, name, attributes): element = self.tree.createElement(name, attributes) element._flags.append("parser-inserted") if self.tree.headPointer is not None and\ self.parser.phase == self.parser.phases["inHead"]: self.appendToHead(element) else: self.tree.openElements[-1].appendChild(element) self.tree.openElements.append(element) self.parser.tokenizer.contentModelFlag = contentModelFlags["CDATA"] def startTagBaseLinkMeta(self, name, attributes): element = self.tree.createElement(name, attributes) self.appendToHead(element) def startTagOther(self, name, attributes): self.anythingElse() self.parser.phase.processStartTag(name, attributes) def endTagHead(self, name): if self.tree.openElements[-1].name == "head": self.tree.openElements.pop() else: self.parser.parseError(_(u"Unexpected end tag (head). Ignored.")) self.parser.phase = self.parser.phases["afterHead"] def endTagHtml(self, name): self.anythingElse() self.parser.phase.processEndTag(name) def endTagTitleStyleScript(self, name): if self.tree.openElements[-1].name == name: self.tree.openElements.pop() else: self.parser.parseError(_(u"Unexpected end tag (" + name +\ "). Ignored.")) def endTagOther(self, name): self.parser.parseError(_(u"Unexpected end tag (" + name +\ "). Ignored.")) def anythingElse(self): if self.tree.openElements[-1].name == "head": self.endTagHead("head") else: self.parser.phase = self.parser.phases["afterHead"] class AfterHeadPhase(Phase): def __init__(self, parser, tree): Phase.__init__(self, parser, tree) self.startTagHandler = utils.MethodDispatcher([ ("html", self.startTagHtml), ("body", self.startTagBody), ("frameset", self.startTagFrameset), (("base", "link", "meta", "script", "style", "title"), self.startTagFromHead) ]) self.startTagHandler.default = self.startTagOther def processEOF(self): self.anythingElse() self.parser.phase.processEOF() def processCharacters(self, data): self.anythingElse() self.parser.phase.processCharacters(data) def startTagBody(self, name, attributes): self.tree.insertElement(name, attributes) self.parser.phase = self.parser.phases["inBody"] def startTagFrameset(self, name, attributes): self.tree.insertElement(name, attributes) self.parser.phase = self.parser.phases["inFrameset"] def startTagFromHead(self, name, attributes): self.parser.parseError(_(u"Unexpected start tag (" + name +\ ") that can be in head. Moved.")) self.parser.phase = self.parser.phases["inHead"] self.parser.phase.processStartTag(name, attributes) def startTagOther(self, name, attributes): self.anythingElse() self.parser.phase.processStartTag(name, attributes) def processEndTag(self, name): self.anythingElse() self.parser.phase.processEndTag(name) def anythingElse(self): self.tree.insertElement("body", {}) self.parser.phase = self.parser.phases["inBody"] class InBodyPhase(Phase): # http://www.whatwg.org/specs/web-apps/current-work/#in-body # the crazy mode def __init__(self, parser, tree): Phase.__init__(self, parser, tree) #Keep a ref to this for special handling of whitespace in

        self.processSpaceCharactersNonPre = self.processSpaceCharacters

        self.startTagHandler = utils.MethodDispatcher([
            ("html", self.startTagHtml),
            (("script", "style"), self.startTagScriptStyle),
            (("base", "link", "meta", "title"),
              self.startTagFromHead),
            ("body", self.startTagBody),
            (("address", "blockquote", "center", "dir", "div", "dl",
              "fieldset", "listing", "menu", "ol", "p", "pre", "ul"),
              self.startTagCloseP),
            ("form", self.startTagForm),
            (("li", "dd", "dt"), self.startTagListItem),
            ("plaintext",self.startTagPlaintext),
            (headingElements, self.startTagHeading),
            ("a", self.startTagA),
            (("b", "big", "em", "font", "i", "nobr", "s", "small", "strike",
              "strong", "tt", "u"),self.startTagFormatting),
            ("button", self.startTagButton),
            (("marquee", "object"), self.startTagMarqueeObject),
            ("xmp", self.startTagXmp),
            ("table", self.startTagTable),
            (("area", "basefont", "bgsound", "br", "embed", "img", "param",
              "spacer", "wbr"), self.startTagVoidFormatting),
            ("hr", self.startTagHr),
            ("image", self.startTagImage),
            ("input", self.startTagInput),
            ("isindex", self.startTagIsIndex),
            ("textarea", self.startTagTextarea),
            (("iframe", "noembed", "noframes", "noscript"), self.startTagCdata),
            ("select", self.startTagSelect),
            (("caption", "col", "colgroup", "frame", "frameset", "head",
              "option", "optgroup", "tbody", "td", "tfoot", "th", "thead",
              "tr"), self.startTagMisplaced),
            (("event-source", "section", "nav", "article", "aside", "header",
              "footer", "datagrid", "command"), self.startTagNew)
        ])
        self.startTagHandler.default = self.startTagOther

        self.endTagHandler = utils.MethodDispatcher([
            ("p",self.endTagP),
            ("body",self.endTagBody),
            ("html",self.endTagHtml),
            (("address", "blockquote", "center", "div", "dl", "fieldset",
              "listing", "menu", "ol", "pre", "ul"), self.endTagBlock),
            ("form", self.endTagForm),
            (("dd", "dt", "li"), self.endTagListItem),
            (headingElements, self.endTagHeading),
            (("a", "b", "big", "em", "font", "i", "nobr", "s", "small",
              "strike", "strong", "tt", "u"), self.endTagFormatting),
            (("marquee", "object", "button"), self.endTagButtonMarqueeObject),
            (("head", "frameset", "select", "optgroup", "option", "table",
              "caption", "colgroup", "col", "thead", "tfoot", "tbody", "tr",
              "td", "th"), self.endTagMisplaced),
            (("area", "basefont", "bgsound", "br", "embed", "hr", "image",
              "img", "input", "isindex", "param", "spacer", "wbr", "frame"),
              self.endTagNone),
            (("noframes", "noscript", "noembed", "textarea", "xmp", "iframe"),
              self.endTagCdataTextAreaXmp),
            (("event-source", "section", "nav", "article", "aside", "header",
              "footer", "datagrid", "command"), self.endTagNew)
            ])
        self.endTagHandler.default = self.endTagOther

    # helper
    def addFormattingElement(self, name, attributes):
        self.tree.insertElement(name, attributes)
        self.tree.activeFormattingElements.append(
            self.tree.openElements[-1])

    # the real deal
    def processSpaceCharactersPre(self, data):
        #Sometimes (start of  blocks) we want to drop leading newlines
        self.processSpaceCharacters = self.processSpaceCharactersNonPre
        if (data.startswith("\n") and self.tree.openElements[-1].name == "pre" 
            and not self.tree.openElements[-1].hasContent()):
            data = data[1:]
        if data:
            self.tree.insertText(data)

    def processCharacters(self, data):
        # XXX The specification says to do this for every character at the
        # moment, but apparently that doesn't match the real world so we don't
        # do it for space characters.
        self.tree.reconstructActiveFormattingElements()
        self.tree.insertText(data)

    def startTagScriptStyle(self, name, attributes):
        self.parser.phases["inHead"].processStartTag(name, attributes)

    def startTagFromHead(self, name, attributes):
        self.parser.parseError(_(u"Unexpected start tag (" + name +\
          ") that belongs in the head. Moved."))
        self.parser.phases["inHead"].processStartTag(name, attributes)

    def startTagBody(self, name, attributes):
        self.parser.parseError(_(u"Unexpected start tag (body)."))
        if len(self.tree.openElements) == 1 \
          or self.tree.openElements[1].name != "body":
            assert self.parser.innerHTML
        else:
            for attr, value in attributes.iteritems():
                if attr not in self.tree.openElements[1].attributes:
                    self.tree.openElements[1].attributes[attr] = value

    def startTagCloseP(self, name, attributes):
        if self.tree.elementInScope("p"):
            self.endTagP("p")
        self.tree.insertElement(name, attributes)
        if name == "pre":
            self.processSpaceCharacters = self.processSpaceCharactersPre

    def startTagForm(self, name, attributes):
        if self.tree.formPointer:
            self.parser.parseError("Unexpected start tag (form). Ignored.")
        else:
            if self.tree.elementInScope("p"):
                self.endTagP("p")
            self.tree.insertElement(name, attributes)
            self.tree.formPointer = self.tree.openElements[-1]

    def startTagListItem(self, name, attributes):
        if self.tree.elementInScope("p"):
            self.endTagP("p")
        stopNames = {"li":("li"), "dd":("dd", "dt"), "dt":("dd", "dt")}
        stopName = stopNames[name]
        # AT Use reversed in Python 2.4...
        for i, node in enumerate(self.tree.openElements[::-1]):
            if node.name in stopName:
                for j in range(i+1):
                    self.tree.openElements.pop()
                break

            # Phrasing elements are all non special, non scoping, non
            # formatting elements
            if (node.name in (specialElements | scopingElements)
              and node.name not in ("address", "div")):
                break
        # Always insert an  element.
        self.tree.insertElement(name, attributes)

    def startTagPlaintext(self, name, attributes):
        if self.tree.elementInScope("p"):
            self.endTagP("p")
        self.tree.insertElement(name, attributes)
        self.parser.tokenizer.contentModelFlag = contentModelFlags["PLAINTEXT"]

    def startTagHeading(self, name, attributes):
        if self.tree.elementInScope("p"):
            self.endTagP("p")
        for item in headingElements:
            if self.tree.elementInScope(item):
                self.parser.parseError(_("Unexpected start tag (" + name +\
                  ")."))
                item = self.tree.openElements.pop()
                while item.name not in headingElements:
                    item = self.tree.openElements.pop()
                break
        self.tree.insertElement(name, attributes)

    def startTagA(self, name, attributes):
        afeAElement = self.tree.elementInActiveFormattingElements("a")
        if afeAElement:
            self.parser.parseError(_(u"Unexpected start tag (a) implies "
              "end tag (a)."))
            self.endTagFormatting("a")
            if afeAElement in self.tree.openElements:
                self.tree.openElements.remove(afeAElement)
            if afeAElement in self.tree.activeFormattingElements:
                self.tree.activeFormattingElements.remove(afeAElement)
        self.tree.reconstructActiveFormattingElements()
        self.addFormattingElement(name, attributes)

    def startTagFormatting(self, name, attributes):
        self.tree.reconstructActiveFormattingElements()
        self.addFormattingElement(name, attributes)

    def startTagButton(self, name, attributes):
        if self.tree.elementInScope("button"):
            self.parser.parseError(_("Unexpected start tag (button) implied "
              "end tag (button)."))
            self.processEndTag("button")
            self.parser.phase.processStartTag(name, attributes)
        else:
            self.tree.reconstructActiveFormattingElements()
            self.tree.insertElement(name, attributes)
            self.tree.activeFormattingElements.append(Marker)

    def startTagMarqueeObject(self, name, attributes):
        self.tree.reconstructActiveFormattingElements()
        self.tree.insertElement(name, attributes)
        self.tree.activeFormattingElements.append(Marker)

    def startTagXmp(self, name, attributes):
        self.tree.reconstructActiveFormattingElements()
        self.tree.insertElement(name, attributes)
        self.parser.tokenizer.contentModelFlag = contentModelFlags["CDATA"]

    def startTagTable(self, name, attributes):
        if self.tree.elementInScope("p"):
            self.processEndTag("p")
        self.tree.insertElement(name, attributes)
        self.parser.phase = self.parser.phases["inTable"]

    def startTagVoidFormatting(self, name, attributes):
        self.tree.reconstructActiveFormattingElements()
        self.tree.insertElement(name, attributes)
        self.tree.openElements.pop()

    def startTagHr(self, name, attributes):
        if self.tree.elementInScope("p"):
            self.endTagP("p")
        self.tree.insertElement(name, attributes)
        self.tree.openElements.pop()

    def startTagImage(self, name, attributes):
        # No really...
        self.parser.parseError(_(u"Unexpected start tag (image). Treated "
          u"as img."))
        self.processStartTag("img", attributes)

    def startTagInput(self, name, attributes):
        self.tree.reconstructActiveFormattingElements()
        self.tree.insertElement(name, attributes)
        if self.tree.formPointer:
            # XXX Not exactly sure what to do here
            self.tree.openElements[-1].form = self.tree.formPointer
        self.tree.openElements.pop()

    def startTagIsIndex(self, name, attributes):
        self.parser.parseError("Unexpected start tag isindex. Don't use it!")
        if self.tree.formPointer:
            return
        self.processStartTag("form", {})
        self.processStartTag("hr", {})
        self.processStartTag("p", {})
        self.processStartTag("label", {})
        # XXX Localization ...
        self.processCharacters(
            "This is a searchable index. Insert your search keywords here:")
        attributes["name"] = "isindex"
        attrs = [[key,value] for key,value in attributes.iteritems()]
        self.processStartTag("input", dict(attrs))
        self.processEndTag("label")
        self.processEndTag("p")
        self.processStartTag("hr", {})
        self.processEndTag("form")

    def startTagTextarea(self, name, attributes):
        # XXX Form element pointer checking here as well...
        self.tree.insertElement(name, attributes)
        self.parser.tokenizer.contentModelFlag = contentModelFlags["RCDATA"]

    def startTagCdata(self, name, attributes):
        """iframe, noembed noframes, noscript(if scripting enabled)"""
        self.tree.insertElement(name, attributes)
        self.parser.tokenizer.contentModelFlag = contentModelFlags["CDATA"]

    def startTagSelect(self, name, attributes):
        self.tree.reconstructActiveFormattingElements()
        self.tree.insertElement(name, attributes)
        self.parser.phase = self.parser.phases["inSelect"]

    def startTagMisplaced(self, name, attributes):
        """ Elements that should be children of other elements that have a
        different insertion mode; here they are ignored
        "caption", "col", "colgroup", "frame", "frameset", "head",
        "option", "optgroup", "tbody", "td", "tfoot", "th", "thead",
        "tr", "noscript"
        """
        self.parser.parseError(_(u"Unexpected start tag (" + name +\
          u"). Ignored."))

    def startTagNew(self, name, other):
        """New HTML5 elements, "event-source", "section", "nav",
        "article", "aside", "header", "footer", "datagrid", "command"
        """
        raise NotImplementedError

    def startTagOther(self, name, attributes):
        self.tree.reconstructActiveFormattingElements()
        self.tree.insertElement(name, attributes)

    def endTagP(self, name):
        if self.tree.elementInScope("p"):
            self.tree.generateImpliedEndTags("p")
        if self.tree.openElements[-1].name != "p":
            self.parser.parseError("Unexpected end tag (p).")
        while self.tree.elementInScope("p"):
            self.tree.openElements.pop()

    def endTagBody(self, name):
        # XXX Need to take open  tags into account here. We shouldn't imply
        #  but we should not throw a parse error either. Specification is
        # likely to be updated.
        if self.tree.openElements[1].name != "body":
            # innerHTML case
            self.parser.parseError()
            return
        if self.tree.openElements[-1].name != "body":
            self.parser.parseError(_("Unexpected end tag (body). Missing "
              u"end tag (" + self.tree.openElements[-1].name + ")."))
        self.parser.phase = self.parser.phases["afterBody"]

    def endTagHtml(self, name):
        self.endTagBody(name)
        if not self.parser.innerHTML:
            self.parser.phase.processEndTag(name)

    def endTagBlock(self, name):
        #Put us back in the right whitespace handling mode
        if name == "pre":
            self.processSpaceCharacters = self.processSpaceCharactersNonPre
        inScope = self.tree.elementInScope(name)
        if inScope:
            self.tree.generateImpliedEndTags()
        if self.tree.openElements[-1].name != name:
             self.parser.parseError((u"End tag (" + name + ") seen too "
               u"early. Expected other end tag."))
        if inScope:
            node = self.tree.openElements.pop()
            while node.name != name:
                node = self.tree.openElements.pop()

    def endTagForm(self, name):
        self.endTagBlock(name)
        self.tree.formPointer = None

    def endTagListItem(self, name):
        # AT Could merge this with the Block case
        if self.tree.elementInScope(name):
            self.tree.generateImpliedEndTags(name)
            if self.tree.openElements[-1].name != name:
                self.parser.parseError((u"End tag (" + name + ") seen too "
                  u"early. Expected other end tag."))

        if self.tree.elementInScope(name):
            node = self.tree.openElements.pop()
            while node.name != name:
                node = self.tree.openElements.pop()

    def endTagHeading(self, name):
        for item in headingElements:
            if self.tree.elementInScope(item):
                self.tree.generateImpliedEndTags()
                break
        if self.tree.openElements[-1].name != name:
            self.parser.parseError((u"Unexpected end tag (" + name + "). "
                  u"Expected other end tag."))

        for item in headingElements:
            if self.tree.elementInScope(item):
                item = self.tree.openElements.pop()
                while item.name not in headingElements:
                    item = self.tree.openElements.pop()
                break

    def endTagFormatting(self, name):
        """The much-feared adoption agency algorithm
        """
        # http://www.whatwg.org/specs/web-apps/current-work/#adoptionAgency
        # XXX Better parseError messages appreciated.
        while True:
            # Step 1 paragraph 1
            afeElement = self.tree.elementInActiveFormattingElements(name)
            if not afeElement or (afeElement in self.tree.openElements and
              not self.tree.elementInScope(afeElement.name)):
                self.parser.parseError(_(u"End tag (" + name + ") violates "
                  u" step 1, paragraph 1 of the adoption agency algorithm."))
                return

            # Step 1 paragraph 2
            elif afeElement not in self.tree.openElements:
                self.parser.parseError(_(u"End tag (" + name + ") violates "
                  u" step 1, paragraph 2 of the adoption agency algorithm."))
                self.tree.activeFormattingElements.remove(afeElement)
                return

            # Step 1 paragraph 3
            if afeElement != self.tree.openElements[-1]:
                self.parser.parseError(_(u"End tag (" + name + ") violates "
                  u" step 1, paragraph 3 of the adoption agency algorithm."))

            # Step 2
            # Start of the adoption agency algorithm proper
            afeIndex = self.tree.openElements.index(afeElement)
            furthestBlock = None
            for element in self.tree.openElements[afeIndex:]:
                if element.name in specialElements | scopingElements:
                    furthestBlock = element
                    break

            # Step 3
            if furthestBlock is None:
                element = self.tree.openElements.pop()
                while element != afeElement:
                    element = self.tree.openElements.pop()
                self.tree.activeFormattingElements.remove(element)
                return
            commonAncestor = self.tree.openElements[afeIndex-1]

            # Step 5
            if furthestBlock.parent:
                furthestBlock.parent.removeChild(furthestBlock)

            # Step 6
            # The bookmark is supposed to help us identify where to reinsert
            # nodes in step 12. We have to ensure that we reinsert nodes after
            # the node before the active formatting element. Note the bookmark
            # can move in step 7.4
            bookmark = self.tree.activeFormattingElements.index(afeElement)

            # Step 7
            lastNode = node = furthestBlock
            while True:
                # AT replace this with a function and recursion?
                # Node is element before node in open elements
                node = self.tree.openElements[
                    self.tree.openElements.index(node)-1]
                while node not in self.tree.activeFormattingElements:
                    tmpNode = node
                    node = self.tree.openElements[
                        self.tree.openElements.index(node)-1]
                    self.tree.openElements.remove(tmpNode)
                # Step 7.3
                if node == afeElement:
                    break
                # Step 7.4
                if lastNode == furthestBlock:
                    # XXX should this be index(node) or index(node)+1
                    # Anne: I think +1 is ok. Given x = [2,3,4,5]
                    # x.index(3) gives 1 and then x[1 +1] gives 4...
                    bookmark = self.tree.activeFormattingElements.\
                      index(node) + 1
                # Step 7.5
                cite = node.parent
                if node.hasContent():
                    clone = node.cloneNode()
                    # Replace node with clone
                    self.tree.activeFormattingElements[
                      self.tree.activeFormattingElements.index(node)] = clone
                    self.tree.openElements[
                      self.tree.openElements.index(node)] = clone
                    node = clone
                # Step 7.6
                # Remove lastNode from its parents, if any
                if lastNode.parent:
                    lastNode.parent.removeChild(lastNode)
                node.appendChild(lastNode)
                # Step 7.7
                lastNode = node
                # End of inner loop

            # Step 8
            if lastNode.parent:
                lastNode.parent.removeChild(lastNode)
            commonAncestor.appendChild(lastNode)

            # Step 9
            clone = afeElement.cloneNode()

            # Step 10
            furthestBlock.reparentChildren(clone)

            # Step 11
            furthestBlock.appendChild(clone)

            # Step 12
            self.tree.activeFormattingElements.remove(afeElement)
            self.tree.activeFormattingElements.insert(bookmark, clone)

            # Step 13
            self.tree.openElements.remove(afeElement)
            self.tree.openElements.insert(
              self.tree.openElements.index(furthestBlock) + 1, clone)

    def endTagButtonMarqueeObject(self, name):
        if self.tree.elementInScope(name):
            self.tree.generateImpliedEndTags()
        if self.tree.openElements[-1].name != name:
            self.parser.parseError(_(u"Unexpected end tag (" + name +\
              "). Expected other end tag first."))

        if self.tree.elementInScope(name):
            element = self.tree.openElements.pop()
            while element.name != name:
                element = self.tree.openElements.pop()
            self.tree.clearActiveFormattingElements()

    def endTagMisplaced(self, name):
        # This handles elements with end tags in other insertion modes.
        self.parser.parseError(_(u"Unexpected end tag (" + name +\
          u"). Ignored."))

    def endTagNone(self, name):
        # This handles elements with no end tag.
        self.parser.parseError(_(u"This tag (" + name + u") has no end tag"))

    def endTagCdataTextAreaXmp(self, name):
        if self.tree.openElements[-1].name == name:
            self.tree.openElements.pop()
        else:
            self.parser.parseError(_("Unexpected end tag (" + name +\
              "). Ignored."))

    def endTagNew(self, name):
        """New HTML5 elements, "event-source", "section", "nav",
        "article", "aside", "header", "footer", "datagrid", "command"
        """
        raise NotImplementedError

    def endTagOther(self, name):
        # XXX This logic should be moved into the treebuilder
        # AT should use reversed instead of [::-1] when Python 2.4 == True.
        for node in self.tree.openElements[::-1]:
            if node.name == name:
                self.tree.generateImpliedEndTags()
                if self.tree.openElements[-1].name != name:
                    self.parser.parseError(_("Unexpected end tag (" + name +\
                      ")."))
                while self.tree.openElements.pop() != node:
                    pass
                break
            else:
                if node.name in specialElements | scopingElements:
                    self.parser.parseError(_(u"Unexpected end tag (" + name +\
                      "). Ignored."))
                    break

class InTablePhase(Phase):
    # http://www.whatwg.org/specs/web-apps/current-work/#in-table
    def __init__(self, parser, tree):
        Phase.__init__(self, parser, tree)
        self.startTagHandler = utils.MethodDispatcher([
            ("html", self.startTagHtml),
            ("caption", self.startTagCaption),
            ("colgroup", self.startTagColgroup),
            ("col", self.startTagCol),
            (("tbody", "tfoot", "thead"), self.startTagRowGroup),
            (("td", "th", "tr"), self.startTagImplyTbody),
            ("table", self.startTagTable)
        ])
        self.startTagHandler.default = self.startTagOther

        self.endTagHandler = utils.MethodDispatcher([
            ("table", self.endTagTable),
            (("body", "caption", "col", "colgroup", "html", "tbody", "td",
              "tfoot", "th", "thead", "tr"), self.endTagIgnore)
        ])
        self.endTagHandler.default = self.endTagOther

    # helper methods
    def clearStackToTableContext(self):
        # "clear the stack back to a table context"
        while self.tree.openElements[-1].name not in ("table", "html"):
            self.parser.parseError(_(u"Unexpected implied end tag (" +\
              self.tree.openElements[-1].name + u") in the table phase."))
            self.tree.openElements.pop()
        # When the current node is  it's an innerHTML case

    # processing methods
    def processCharacters(self, data):
        self.parser.parseError(_(u"Unexpected non-space characters in "
          u"table context caused voodoo mode."))
        # Make all the special element rearranging voodoo kick in
        self.tree.insertFromTable = True
        # Process the character in the "in body" mode
        self.parser.phases["inBody"].processCharacters(data)
        self.tree.insertFromTable = False

    def startTagCaption(self, name, attributes):
        self.clearStackToTableContext()
        self.tree.activeFormattingElements.append(Marker)
        self.tree.insertElement(name, attributes)
        self.parser.phase = self.parser.phases["inCaption"]

    def startTagColgroup(self, name, attributes):
        self.clearStackToTableContext()
        self.tree.insertElement(name, attributes)
        self.parser.phase = self.parser.phases["inColumnGroup"]

    def startTagCol(self, name, attributes):
        self.startTagColgroup("colgroup", {})
        self.parser.phase.processStartTag(name, attributes)

    def startTagRowGroup(self, name, attributes):
        self.clearStackToTableContext()
        self.tree.insertElement(name, attributes)
        self.parser.phase = self.parser.phases["inTableBody"]

    def startTagImplyTbody(self, name, attributes):
        self.startTagRowGroup("tbody", {})
        self.parser.phase.processStartTag(name, attributes)

    def startTagTable(self, name, attributes):
        self.parser.parseError(_(u"Unexpected start tag (table) in table "
          u"phase. Implies end tag (table)."))
        self.parser.phase.processEndTag("table")
        if not self.parser.innerHTML:
            self.parser.phase.processStartTag(name, attributes)

    def startTagOther(self, name, attributes):
        self.parser.parseError(_(u"Unexpected start tag (" + name + u") in "
          u"table context caused voodoo mode."))
        # Make all the special element rearranging voodoo kick in
        self.tree.insertFromTable = True
        # Process the start tag in the "in body" mode
        self.parser.phases["inBody"].processStartTag(name, attributes)
        self.tree.insertFromTable = False

    def endTagTable(self, name):
        if self.tree.elementInScope("table", True):
            self.tree.generateImpliedEndTags()
            if self.tree.openElements[-1].name != "table":
                self.parser.parseError(_(u"Unexpected end tag (table). "
                  u"Expected end tag (" + self.tree.openElements[-1].name +\
                  u")."))
            while self.tree.openElements[-1].name != "table":
                self.tree.openElements.pop()
            self.tree.openElements.pop()
            self.parser.resetInsertionMode()
        else:
            # innerHTML case
            self.parser.parseError()

    def endTagIgnore(self, name):
        self.parser.parseError(_("Unexpected end tag (" + name +\
          "). Ignored."))

    def endTagOther(self, name):
        self.parser.parseError(_(u"Unexpected end tag (" + name + u") in "
          u"table context caused voodoo mode."))
        # Make all the special element rearranging voodoo kick in
        self.parser.insertFromTable = True
        # Process the end tag in the "in body" mode
        self.parser.phases["inBody"].processEndTag(name)
        self.parser.insertFromTable = False


class InCaptionPhase(Phase):
    # http://www.whatwg.org/specs/web-apps/current-work/#in-caption
    def __init__(self, parser, tree):
        Phase.__init__(self, parser, tree)

        self.startTagHandler = utils.MethodDispatcher([
            ("html", self.startTagHtml),
            (("caption", "col", "colgroup", "tbody", "td", "tfoot", "th",
              "thead", "tr"), self.startTagTableElement)
        ])
        self.startTagHandler.default = self.startTagOther

        self.endTagHandler = utils.MethodDispatcher([
            ("caption", self.endTagCaption),
            ("table", self.endTagTable),
            (("body", "col", "colgroup", "html", "tbody", "td", "tfoot", "th",
              "thead", "tr"), self.endTagIgnore)
        ])
        self.endTagHandler.default = self.endTagOther

    def processCharacters(self, data):
        self.parser.phases["inBody"].processCharacters(data)

    def startTagTableElement(self, name, attributes):
        self.parser.parseError()
        self.parser.phase.processEndTag("caption")
        # XXX how do we know the tag is _always_ ignored in the innerHTML
        # case and therefore shouldn't be processed again? I'm not sure this
        # strategy makes sense...
        if not self.parser.innerHTML:
            self.parser.phase.processStartTag(name, attributes)

    def startTagOther(self, name, attributes):
        self.parser.phases["inBody"].processStartTag(name, attributes)

    def endTagCaption(self, name):
        if self.tree.elementInScope(name, True):
            # AT this code is quite similar to endTagTable in "InTable"
            self.tree.generateImpliedEndTags()
            if self.tree.openElements[-1].name != "caption":
                self.parser.parseError(_(u"Unexpected end tag (caption). "
                  u"Missing end tags."))
            while self.tree.openElements[-1].name != "caption":
                self.tree.openElements.pop()
            self.tree.openElements.pop()
            self.tree.clearActiveFormattingElements()
            self.parser.phase = self.parser.phases["inTable"]
        else:
            # innerHTML case
            self.parser.parseError()

    def endTagTable(self, name):
        self.parser.parseError()
        self.parser.phase.processEndTag("caption")
        # XXX ...
        if not self.parser.innerHTML:
            self.parser.phase.processStartTag(name, attributes)

    def endTagIgnore(self, name):
        self.parser.parseError(_("Unexpected end tag (" + name +\
          "). Ignored."))

    def endTagOther(self, name):
        self.parser.phases["inBody"].processEndTag(name)


class InColumnGroupPhase(Phase):
    # http://www.whatwg.org/specs/web-apps/current-work/#in-column

    def __init__(self, parser, tree):
        Phase.__init__(self, parser, tree)

        self.startTagHandler = utils.MethodDispatcher([
            ("html", self.startTagHtml),
            ("col", self.startTagCol)
        ])
        self.startTagHandler.default = self.startTagOther

        self.endTagHandler = utils.MethodDispatcher([
            ("colgroup", self.endTagColgroup),
            ("col", self.endTagCol)
        ])
        self.endTagHandler.default = self.endTagOther

    def processCharacters(self, data):
        self.endTagColgroup("colgroup")
        # XXX
        if not self.parser.innerHTML:
            self.parser.phase.processCharacters(data)

    def startTagCol(self, name ,attributes):
        self.tree.insertElement(name, attributes)
        self.tree.openElements.pop()

    def startTagOther(self, name, attributes):
        self.endTagColgroup("colgroup")
        # XXX how can be sure it's always ignored?
        if not self.parser.innerHTML:
            self.parser.phase.processStartTag(name, attributes)

    def endTagColgroup(self, name):
        if self.tree.openElements[-1].name == "html":
            # innerHTML case
            self.parser.parseError()
        else:
            self.tree.openElements.pop()
            self.parser.phase = self.parser.phases["inTable"]

    def endTagCol(self, name):
        self.parser.parseError(_(u"Unexpected end tag (col). "
          u"col has no end tag."))

    def endTagOther(self, name):
        self.endTagColgroup("colgroup")
        # XXX how can be sure it's always ignored?
        if not self.parser.innerHTML:
            self.parser.phase.processEndTag(name)


class InTableBodyPhase(Phase):
    # http://www.whatwg.org/specs/web-apps/current-work/#in-table0
    def __init__(self, parser, tree):
        Phase.__init__(self, parser, tree)
        self.startTagHandler = utils.MethodDispatcher([
            ("html", self.startTagHtml),
            ("tr", self.startTagTr),
            (("td", "th"), self.startTagTableCell),
            (("caption", "col", "colgroup", "tbody", "tfoot", "thead"), self.startTagTableOther)
        ])
        self.startTagHandler.default = self.startTagOther

        self.endTagHandler = utils.MethodDispatcher([
            (("tbody", "tfoot", "thead"), self.endTagTableRowGroup),
            ("table", self.endTagTable),
            (("body", "caption", "col", "colgroup", "html", "td", "th",
              "tr"), self.endTagIgnore)
        ])
        self.endTagHandler.default = self.endTagOther

    # helper methods
    def clearStackToTableBodyContext(self):
        while self.tree.openElements[-1].name not in ("tbody", "tfoot",
          "thead", "html"):
            self.parser.parseError(_(u"Unexpected implied end tag (" +\
              self.tree.openElements[-1].name + u") in the table body phase."))
            self.tree.openElements.pop()

    # the rest
    def processCharacters(self,data):
        self.parser.phases["inTable"].processCharacters(data)

    def startTagTr(self, name, attributes):
        self.clearStackToTableBodyContext()
        self.tree.insertElement(name, attributes)
        self.parser.phase = self.parser.phases["inRow"]

    def startTagTableCell(self, name, attributes):
        self.parser.parseError(_(u"Unexpected table cell start tag (" +\
          name + u") in the table body phase."))
        self.startTagTr("tr", {})
        self.parser.phase.processStartTag(name, attributes)

    def startTagTableOther(self, name, attributes):
        # XXX AT Any ideas on how to share this with endTagTable?
        if self.tree.elementInScope("tbody", True) or \
          self.tree.elementInScope("thead", True) or \
          self.tree.elementInScope("tfoot", True):
            self.clearStackToTableBodyContext()
            self.endTagTableRowGroup(self.tree.openElements[-1].name)
            self.parser.phase.processStartTag(name, attributes)
        else:
            # innerHTML case
            self.parser.parseError()

    def startTagOther(self, name, attributes):
        self.parser.phases["inTable"].processStartTag(name, attributes)

    def endTagTableRowGroup(self, name):
        if self.tree.elementInScope(name, True):
            self.clearStackToTableBodyContext()
            self.tree.openElements.pop()
            self.parser.phase = self.parser.phases["inTable"]
        else:
            self.parser.parseError(_("Unexpected end tag (" + name +\
              ") in the table body phase. Ignored."))

    def endTagTable(self, name):
        if self.tree.elementInScope("tbody", True) or \
          self.tree.elementInScope("thead", True) or \
          self.tree.elementInScope("tfoot", True):
            self.clearStackToTableBodyContext()
            self.endTagTableRowGroup(self.tree.openElements[-1].name)
            self.parser.phase.processEndTag(name)
        else:
            # innerHTML case
            self.parser.parseError()

    def endTagIgnore(self, name):
        self.parser.parseError(_("Unexpected end tag (" + name +\
          ") in the table body phase. Ignored."))

    def endTagOther(self, name):
        self.parser.phases["inTable"].processEndTag(name)


class InRowPhase(Phase):
    # http://www.whatwg.org/specs/web-apps/current-work/#in-row
    def __init__(self, parser, tree):
        Phase.__init__(self, parser, tree)
        self.startTagHandler = utils.MethodDispatcher([
            ("html", self.startTagHtml),
            (("td", "th"), self.startTagTableCell),
            (("caption", "col", "colgroup", "tbody", "tfoot", "thead",
              "tr"), self.startTagTableOther)
        ])
        self.startTagHandler.default = self.startTagOther

        self.endTagHandler = utils.MethodDispatcher([
            ("tr", self.endTagTr),
            ("table", self.endTagTable),
            (("tbody", "tfoot", "thead"), self.endTagTableRowGroup),
            (("body", "caption", "col", "colgroup", "html", "td", "th"),
              self.endTagIgnore)
        ])
        self.endTagHandler.default = self.endTagOther

    # helper methods (XXX unify this with other table helper methods)
    def clearStackToTableRowContext(self):
        while self.tree.openElements[-1].name not in ("tr", "html"):
            self.parser.parseError(_(u"Unexpected implied end tag (" +\
              self.tree.openElements[-1].name + u") in the row phase."))
            self.tree.openElements.pop()

    # the rest
    def processCharacters(self, data):
        self.parser.phases["inTable"].processCharacters(data)

    def startTagTableCell(self, name, attributes):
        self.clearStackToTableRowContext()
        self.tree.insertElement(name, attributes)
        self.parser.phase = self.parser.phases["inCell"]
        self.tree.activeFormattingElements.append(Marker)

    def startTagTableOther(self, name, attributes):
        self.endTagTr("tr")
        # XXX how are we sure it's always ignored in the innerHTML case?
        if not self.parser.innerHTML:
            self.parser.phase.processStartTag(name, attributes)

    def startTagOther(self, name, attributes):
        self.parser.phases["inTable"].processStartTag(name, attributes)

    def endTagTr(self, name):
        if self.tree.elementInScope("tr", True):
            self.clearStackToTableRowContext()
            self.tree.openElements.pop()
            self.parser.phase = self.parser.phases["inTableBody"]
        else:
            # innerHTML case
            self.parser.parseError()

    def endTagTable(self, name):
        self.endTagTr("tr")
        # Reprocess the current tag if the tr end tag was not ignored
        # XXX how are we sure it's always ignored in the innerHTML case?
        if not self.parser.innerHTML:
            self.parser.phase.processEndTag(name)

    def endTagTableRowGroup(self, name):
        if self.tree.elementInScope(name, True):
            self.endTagTr("tr")
            self.parser.phase.processEndTag(name)
        else:
            # innerHTML case
            self.parser.parseError()

    def endTagIgnore(self, name):
        self.parser.parseError(_("Unexpected end tag (" + name +\
          u") in the row phase. Ignored."))

    def endTagOther(self, name):
        self.parser.phases["inTable"].processEndTag(name)

class InCellPhase(Phase):
    # http://www.whatwg.org/specs/web-apps/current-work/#in-cell
    def __init__(self, parser, tree):
        Phase.__init__(self, parser, tree)
        self.startTagHandler = utils.MethodDispatcher([
            ("html", self.startTagHtml),
            (("caption", "col", "colgroup", "tbody", "td", "tfoot", "th",
              "thead", "tr"), self.startTagTableOther)
        ])
        self.startTagHandler.default = self.startTagOther

        self.endTagHandler = utils.MethodDispatcher([
            (("td", "th"), self.endTagTableCell),
            (("body", "caption", "col", "colgroup", "html"), self.endTagIgnore),
            (("table", "tbody", "tfoot", "thead", "tr"), self.endTagImply)
        ])
        self.endTagHandler.default = self.endTagOther

    # helper
    def closeCell(self):
        if self.tree.elementInScope("td", True):
            self.endTagTableCell("td")
        elif self.tree.elementInScope("th", True):
            self.endTagTableCell("th")

    # the rest
    def processCharacters(self, data):
        self.parser.phases["inBody"].processCharacters(data)

    def startTagTableOther(self, name, attributes):
        if self.tree.elementInScope("td", True) or \
          self.tree.elementInScope("th", True):
            self.closeCell()
            self.parser.phase.processStartTag(name, attributes)
        else:
            # innerHTML case
            self.parser.parseError()

    def startTagOther(self, name, attributes):
        self.parser.phases["inBody"].processStartTag(name, attributes)
        # Optimize this for subsequent invocations. Can't do this initially
        # because self.phases doesn't really exist at that point.
        self.startTagHandler.default =\
          self.parser.phases["inBody"].processStartTag

    def endTagTableCell(self, name):
        if self.tree.elementInScope(name, True):
            self.tree.generateImpliedEndTags(name)
            if self.tree.openElements[-1].name != name:
                self.parser.parseError("Got table cell end tag (" + name +\
                  ") while required end tags are missing.")
                while True:
                    node = self.tree.openElements.pop()
                    if node.name == name:
                        break
            else:
                self.tree.openElements.pop()
            self.tree.clearActiveFormattingElements()
            self.parser.phase = self.parser.phases["inRow"]
        else:
            self.parser.parseError(_("Unexpected end tag (" + name +\
              "). Ignored."))

    def endTagIgnore(self, name):
        self.parser.parseError(_("Unexpected end tag (" + name +\
          "). Ignored."))

    def endTagImply(self, name):
        if self.tree.elementInScope(name, True):
            self.closeCell()
            self.parser.phase.processEndTag(name)
        else:
            # sometimes innerHTML case
            self.parser.parseError()

    def endTagOther(self, name):
        self.parser.phases["inBody"].processEndTag(name)
        # Optimize this for subsequent invocations. Can't do this initially
        # because self.phases doesn't really exist at that point.
        self.endTagHandler.default = self.parser.phases["inBody"].processEndTag


class InSelectPhase(Phase):
    def __init__(self, parser, tree):
        Phase.__init__(self, parser, tree)

        self.startTagHandler = utils.MethodDispatcher([
            ("html", self.startTagHtml),
            ("option", self.startTagOption),
            ("optgroup", self.startTagOptgroup),
            ("select", self.startTagSelect)
        ])
        self.startTagHandler.default = self.startTagOther

        self.endTagHandler = utils.MethodDispatcher([
            ("option", self.endTagOption),
            ("optgroup", self.endTagOptgroup),
            ("select", self.endTagSelect),
            (("caption", "table", "tbody", "tfoot", "thead", "tr", "td",
              "th"), self.endTagTableElements)
        ])
        self.endTagHandler.default = self.endTagOther

    # http://www.whatwg.org/specs/web-apps/current-work/#in-select
    def processCharacters(self, data):
        self.tree.insertText(data)

    def startTagOption(self, name, attributes):
        # We need to imply  if