import codecs
import xml.parsers.expat
import unittest
class Node:
"""Node: A class representing a node in an XML DOM tree.
This is a lightweight XML representation -- I didn't want to use
the full, foot-crushing power of xml.dom (from the standard Python
library). Each Node represents an XML tag (or tree of tags). There
are no separate objects for documents, attributes, data, etc.
It's just tags, with stuff in them.
Node(name, attrs=None, namespace=None, parent=None, children=None,
data=None) -- constructor.
*name* and *namespace* are what they say; these must be strings
(or unicode). If *attrs* (a dict) is provided, the new node is
given those attributes. If the *parent* is provided, the new node
is appended to its contents. If the *children* (a list) are
provided, they are appended to the new node's contents. If the
*data* is provided (a string or unicode object, or something
convertible to unicode), it is appended as well.
A Node's namespace is assumed to be inherited from its parent, if
you provide none. (This means that the value of nod.getnamespace()
can change after you add it as a child to some other Node.)
(You can also provide a namespace by putting an 'xmlns' key in
*attrs*. This is equivalent to providing a *namespace*.)
A Node can only have one parent. If you try to add a Node somewhere
as a child, and it already has a parent, you will get a ValueError.
(If you call the node's remove() method, it will become parentless,
and you can then give it a new parent.)
Don't do crazy stuff like setting a Node to be its own grandpa.
Class methods:
escapetext() -- escape <, >, ", and & symbols in a string.
parse() -- parse a string into a Node tree.
parsefile() -- parse a file into a Node tree.
Public methods:
delete() -- destroy a Node and its children.
copy() -- return a (deep) copy of a Node.
getname() -- return the Node's tag name.
getnamespace() -- return the Node's namespace.
setnamespace() -- set (or clear) the Node's namespace.
getcontents() -- return the Node's list of Nodes and data.
addchild() -- add a Node as a child of this Node.
removechild() -- remove a Node as a child of this Node.
remove() -- remove this Node from its parent, if any.
getparent() -- return the Node's parent.
setparent() -- add this Node as a child of some other Node.
getchild() -- return a child of the Node, according to criteria.
getchildren() -- return some children of the Node, according to criteria.
getdata() -- return all the data in the Node.
setdata() -- change the data in the Node.
adddata() -- append data to the Node.
cleardata() -- clear all data from the Node.
getattrs() -- return all attributes in the Node.
setattrs() -- set several attributes in the Node.
clearattrs() -- clear all attributes from the Node.
getattr() -- return one attribute in the Node.
getattrdef() -- return one attribute in the Node.
hasattr() -- test if the Node has a given attribute.
setattr() -- set an attribute in the Node.
clearattr() -- clear an attribute from the Node.
setchild() -- find or create a child, according to criteria.
getchilddata() -- return the data from one child of the Node.
setchilddata() -- find or create a child, according to criteria, and
set its data.
addchilddata() -- find or create a child, according to criteria, and
append to its data.
getchildattr() -- return an attribute from one child of the Node.
getchildattrdef() -- return an attribute from one child of the Node.
setchildattr() -- find or create a child, according to criteria, and
set an attribute in it.
serialize() -- create a string representing the Node as XML.
"""
def __init__(self, name, attrs=None, namespace=None, parent=None,
children=None, data=None):
if (attrs and attrs.has_key('xmlns')):
ns = attrs.pop('xmlns')
if (not namespace):
namespace = ns
if (not name):
raise ValueError('Node must have a name')
self.name = name
self.namespace = namespace
self.attrs = {}
self.contents = []
self.parent = None
if (parent):
if (not isinstance(parent, Node)):
raise TypeError('parent must be a Node')
parent.addchild(self)
if (attrs):
for key in attrs.keys():
self.setattr(key, attrs[key])
self.adddata(data)
if (children):
if (isinstance(children, Node)):
children = [children]
for nod in children:
self.addchild(nod)
def __str__(self):
return str(self.serialize())
def __unicode__(self):
return unicode(self.serialize())
def copy(self):
"""copy() -> Node
Return a (deep) copy of a Node.
The copy will not have a parent, even if this Node does. (But it
will copy an inherited namespace as an explicit one.)
"""
nod = Node(self.getname(), self.getattrs(), self.getnamespace())
for val in self.getcontents():
if (isinstance(val, Node)):
subnod = val.copy()
nod.addchild(subnod)
else:
nod.adddata(val)
return nod
def delete(self):
"""delete() -> None
Destroy a Node and its children. You don't have to do this, but it
gets rid of the object without relying on garbage collection.
"""
if (self.parent):
self.parent.removechild(self)
for nod in self.contents:
if (isinstance(nod, Node)):
nod.delete()
self.attrs = None
self.contents = []
self.parent = None
def getname(self):
"""getname() -> str/unicode
Return the Node's tag name.
"""
return self.name
def getnamespace(self, default=None):
"""getnamespace(default=None) -> ns
Return the Node's namespace. If the Node has no namespace set for
itself, this returns its parent's namespace, if it has a parent.
If not, it returns None (or *default*).
"""
nod = self
while (nod):
if (nod.namespace):
return nod.namespace
nod = nod.parent
return default
def setnamespace(self, ns):
"""setnamespace(ns) -> None
Set (or clear) the Node's namespace. If *ns* is a string (or unicode),
this sets the namespace. If *ns* is None, it clears it.
If a Node's namespace is unset, it inherits the namespace of its
parent (if it has a parent). Therefore, calling setnamespace(None)
does not imply that getnamespace() will return None.
"""
if (not ns):
self.namespace = None
else:
self.namespace = ns
def getcontents(self):
"""getcontents() -> list
Return the Node's list of Nodes and data. Data elements are
represented as string/unicode objects. They are concatenated if
possible -- you will not see two data elements in a row.
"""
ls = []
data = []
for val in self.contents:
if (isinstance(val, Node)):
if (data):
ls.append(''.join(data))
data = []
ls.append(val)
else:
data.append(val)
if (data):
ls.append(''.join(data))
data = []
return ls
def addchild(self, nod):
"""addchild(node) -> None
Add *node* as a child of this Node. The *node* must not already
have a parent. If the Node already has child nodes or data, the new
child is placed at the end.
"""
if (not isinstance(nod, Node)):
raise TypeError('children must be Nodes')
if (nod.parent):
raise ValueError('child node already has a parent')
self.contents.append(nod)
nod.parent = self
def removechild(self, nod, preservenamespace=False):
"""removechild(node, preservenamespace=False) -> Node
Remove *node* as a child of this Node. Returns *node*.
If *preservenamespace* is True, and *node* was inheriting its
namespace from its parent, then *node* will get that namespace
explicitly set. (So calling getnamespace() on it after removal
will return the same value as before.)
"""
if (not isinstance(nod, Node)):
raise TypeError('children must be Nodes')
if (not nod in self.contents):
raise ValueError('not a child of this Node')
nod.remove(preservenamespace=preservenamespace)
return nod
def remove(self, preservenamespace=False):
"""remove(preservenamespace=False) -> None
Remove this Node from its parent, if any. (If it had no parent,
this method does nothing.)
If *preservenamespace* is True, and the node was inheriting its
namespace from its parent, then it will get that namespace
explicitly set. (So calling getnamespace() on it after removal
will return the same value as before.)
"""
if (not self.parent):
return
if (preservenamespace and not self.namespace):
ns = self.getnamespace()
self.setnamespace(ns)
self.parent.contents.remove(self)
self.parent = None
def getparent(self):
"""getparent() -> Node
Return the Node's parent. If it had none, returns None.
"""
return self.parent
def setparent(self, nod):
"""setparent(node) -> None
Add this Node as a child of *node*. This node must not already
have a parent. If *node* already has child nodes or data, the new
child is placed at the end.
"""
nod.addchild(self)
def getchild(self, name=None, attrs=None, namespace=None):
"""getchild(name=None, attrs=None, namespace=None) -> Node
Return a child of the Node, according to criteria.
If no parameters are supplied, this returns the first of the Node's
child nodes. The parameters put conditions on the children which
are considered:
name=string: Only children whose tag names match.
attrs=dict: Only children who match each of the given attributes.
namespace=string: Only children whose namespace matches.
(Inherited namespaces count.)
(You can also match the namespace by putting an 'xmlns' key in
*attrs*.)
If there are no matches, None is returned.
"""
return self.getchildren(name=name, attrs=attrs, namespace=namespace,
first=True)
def getchildren(self, name=None, attrs=None, namespace=None, first=False):
"""getchildren(name=None, attrs=None, namespace=None, first=False)
-> list
Return some children of the Node, according to criteria.
If no parameters are supplied, this returns all the Node's child
nodes, as a list. The parameters filter the list:
name=string: Only children whose tag names match.
attrs=dict: Only children who match each of the given attributes.
namespace=string: Only children whose namespace matches.
(Inherited namespaces count.)
(You can also match the namespace by putting an 'xmlns' key in
*attrs*.)
If *first* is True, the first matching child is returned, instead
of a list of all matches. If there are no matches, None is returned.
"""
if (attrs and attrs.has_key('xmlns')):
ns = attrs.pop('xmlns')
if (not namespace):
namespace = ns
ls = [ val for val in self.contents if isinstance(val, Node) ]
if (name != None):
ls = [ nod for nod in ls if nod.name == name ]
if (namespace != None):
ls = [ nod for nod in ls if nod.getnamespace() == namespace ]
if (attrs):
for key in attrs.keys():
val = attrs[key]
ls = [ nod for nod in ls if nod.attrs.get(key) == val ]
if (first):
if (not ls):
return None
return ls[0]
else:
return ls
def getdata(self):
"""getdata() -> str/unicode
Return all the data in the Node. If there are multiple elements,
they are concatenated, ignoring the child Nodes between them.
"""
ls = [ val for val in self.contents if not isinstance(val, Node) ]
return ''.join(ls)
def setdata(self, data):
"""setdata(data) -> None
Change the data in the Node. Previously existing data elements
are removed, and the new data is added. (After any child nodes,
if there are any.)
The *data* must be str, unicode, or convertible to unicode. (Or
None, which is considered equivalent to ''.)
"""
self.cleardata()
self.adddata(data)
def adddata(self, data):
"""adddata(data) -> None
Append data to the Node. (After any child nodes, if there are any.)
The *data* must be str, unicode, or convertible to unicode. (Or
None, which is considered equivalent to ''.)
"""
if (data):
if (not type(data) in [str, unicode]):
if (isinstance(data, Node)):
raise TypeError('cannot pass a Node to adddata')
data = unicode(data)
self.contents.append(data)
def cleardata(self):
"""cleardata() -> None
Clear all data from the Node. Previously existing data elements
are removed.
"""
ls = [ val for val in self.contents if isinstance(val, Node) ]
self.contents = ls
def getattrs(self):
"""getattrs() -> dict
Return all attributes in the Node. You should not modify the
returned dict.
The dict will not contain 'xmlns'. Call getnamespace() to see the
Node's namespace.
"""
return self.attrs
def setattrs(self, arg_=None, **dic):
"""setattrs(val=None, **dic) -> None
Set (or clear) several attributes in the Node. You get several
options for setting this up:
setattrs( fish='trout', fowl='dove' )
setattrs( {'fish':'trout', 'fowl':'dove'} )
setattrs( [ ('fish','trout'), ('fowl','dove') ] )
setattrs(val) # *val* is anything which can be cast to a dict
Any key whose value is None (or '') causes the removal of that
attribute (if present).
The key 'xmlns' sets or clears the Node's namespace.
"""
if (arg_ != None):
arg_ = dict(arg_)
dic.update(arg_)
for key in dic.keys():
val = dic[key]
self.setattr(key, val)
def clearattrs(self):
"""clearattrs() -> None
Clear all attributes from the Node. (Except namespace; this method
does not affect that.)
"""
self.attrs.clear()
def getattr(self, key, default=None):
"""getattr(key, default=None) -> str/unicode
Return one attribute in the Node. If the attribute is not present,
returns None (or *default*).
If *key* is 'xmlns', returns the namespace (which may be inherited).
"""
if (key == 'xmlns'):
return self.getnamespace(default)
return self.attrs.get(key, default)
def getattrdef(self, key, *default):
"""getattrdef(key [, default] ) -> str/unicode
Return one attribute in the Node. If the attribute is not present,
raises KeyError (unless *default* has been provided, in which case
it returns that.).
If *key* is 'xmlns', returns the namespace (which may be inherited).
"""
if (key == 'xmlns'):
return self.getnamespace(*default)
if (not default):
return self.attrs[key]
return self.attrs.get(key, *default)
def hasattr(self, key):
"""hasattr(key) -> bool
Test if the Node has a given attribute.
If *key* is 'xmlns', returns whether the Node has a namespace.
(Inherited namespaces count.)
"""
if (key == 'xmlns'):
return bool(self.getnamespace())
return self.attrs.has_key(key)
def setattr(self, key, data):
"""setattr(key, data) -> None
Set an attribute in the Node. The *data* must be str, unicode, or
convertible to unicode. If *data* is None (or ''), this removes
the attribute instead (if present).
If *key* is 'xmlns', this sets or clears the namespace.
"""
if (not type(key) in [str, unicode]):
raise TypeError('key must be a string')
if (' ' in key):
raise ValueError('attribute names cannot contain spaces')
if (data):
if (not type(data) in [str, unicode]):
data = unicode(data)
if (key == 'xmlns'):
self.setnamespace(data)
else:
self.attrs[key] = data
else:
if (key == 'xmlns'):
self.setnamespace(None)
else:
self.attrs.pop(key, None)
def clearattr(self, key):
"""clearattr(key) -> None
Clear an attribute from the Node.
If *key* is 'xmlns', this clears the namespace.
"""
if (key == 'xmlns'):
self.setnamespace(None)
else:
self.attrs.pop(key, None)
def setchild(self, name, attrs=None, namespace=None):
"""setchild(name, attrs=None, namespace=None) -> Node
Find or create a child, according to criteria.
This finds the first child node whose name is *name*. If there is
no such node, it creates one. It then returns the node that was
found or created.
You can supply additional conditions for which children to
consider:
attrs=dict: Only children who match each of the given attributes.
namespace=string: Only children whose namespace matches.
(Inherited namespaces count.)
(You can also match the namespace by putting an 'xmlns' key in
*attrs*.)
If a new node is created, these values are applied to it.
"""
nod = self.getchild(name=name, attrs=attrs, namespace=namespace)
if (not nod):
nod = Node(name=name, attrs=attrs, namespace=namespace,
parent=self)
return nod
def getchilddata(self, name=None, attrs=None, namespace=None):
"""getchilddata(name=None, attrs=None, namespace=None) -> str/unicode
Return the data from one child of the Node.
This finds the first child node, and returns all its data.
(If there are no children, it returns None.) If the child has
multiple data elements, they are concatenated, ignoring the Nodes
between them.
You can supply additional conditions for which children to
consider:
name=string: Only children whose tag names match.
attrs=dict: Only children who match each of the given attributes.
namespace=string: Only children whose namespace matches.
(Inherited namespaces count.)
(You can also match the namespace by putting an 'xmlns' key in
*attrs*.)
"""
nod = self.getchild(name=name, attrs=attrs, namespace=namespace)
if (not nod):
return None
return nod.getdata()
def setchilddata(self, name, data='', attrs=None, namespace=None):
"""setchilddata(name, data='', attrs=None, namespace=None) -> Node
Find or create a child, according to criteria, and set its data.
This finds the first child node whose name is *name*. If there is
no such node, it creates one. It then sets the child's character
data to *data*. Finally, it returns the node that was found or
created.
You can supply additional conditions for which children to
consider:
attrs=dict: Only children who match each of the given attributes.
namespace=string: Only children whose namespace matches.
(Inherited namespaces count.)
(You can also match the namespace by putting an 'xmlns' key in
*attrs*.)
If a new node is created, these values are applied to it.
"""
nod = self.setchild(name=name, attrs=attrs, namespace=namespace)
nod.setdata(data)
return nod
def addchilddata(self, name, data='', attrs=None, namespace=None):
"""addchilddata(name, data='', attrs=None, namespace=None) -> Node
Find or create a child, according to criteria, and append to its
data.
This finds the first child node whose name is *name*. If there is
no such node, it creates one. It then appends *data* to the child's
character data. Finally, it returns the node that was found or
created.
You can supply additional conditions for which children to
consider:
attrs=dict: Only children who match each of the given attributes.
namespace=string: Only children whose namespace matches.
(Inherited namespaces count.)
(You can also match the namespace by putting an 'xmlns' key in
*attrs*.)
If a new node is created, these values are applied to it.
"""
nod = self.setchild(name=name, attrs=attrs, namespace=namespace)
nod.adddata(data)
return nod
def getchildattr(self, key, default=None,
name=None, attrs=None, namespace=None):
"""getchildattr(key, default=None, name=None, attrs=None,
namespace=None) -> str/unicode
Return an attribute from one child of the Node.
This finds the first child node, and returns the value of its
*key* attribute. (If there is no such attribute, this returns
None, or *default*. If there are no children, same thing.)
You can supply additional conditions for which children to
consider:
name=string: Only children whose tag names match.
attrs=dict: Only children who match each of the given attributes.
namespace=string: Only children whose namespace matches.
(Inherited namespaces count.)
(You can also match the namespace by putting an 'xmlns' key in
*attrs*.)
"""
nod = self.getchild(name=name, attrs=attrs, namespace=namespace)
if (not nod):
return default
return nod.getattr(key, default)
def getchildattrdef(self, key, default=KeyError,
name=None, attrs=None, namespace=None):
"""getchildattrdef(key, default=KeyError, name=None, attrs=None,
namespace=None) -> str/unicode
Return an attribute from one child of the Node.
This finds the first child node, and returns the value of its
*key* attribute. (If there is no such attribute, this raises
KeyError -- unless *default* is supplied, in which case it
returns that. If there are no children, same thing.)
You can supply additional conditions for which children to
consider:
name=string: Only children whose tag names match.
attrs=dict: Only children who match each of the given attributes.
namespace=string: Only children whose namespace matches.
(Inherited namespaces count.)
(You can also match the namespace by putting an 'xmlns' key in
*attrs*.)
"""
nod = self.getchild(name=name, attrs=attrs, namespace=namespace)
if (not nod):
if (default == KeyError):
raise KeyError('no such child')
else:
return default
if (default == KeyError):
return nod.getattrdef(key)
else:
return nod.getattrdef(key, default)
def setchildattr(self, name, key, data,
attrs=None, namespace=None):
"""setchildattr(name, key, data, attrs=None, namespace=None) -> Node
This finds the first child node whose name is *name*. If there is
no such node, it creates one. It then sets the child's *key*
attribute to *data*. (If *data* is None or '', it instead
removes the *key* attribute, if present.) Finally, it returns
the node that was found or created.
You can supply additional conditions for which children to
consider:
attrs=dict: Only children who match each of the given attributes.
namespace=string: Only children whose namespace matches.
(Inherited namespaces count.)
(You can also match the namespace by putting an 'xmlns' key in
*attrs*.)
If a new node is created, these values are applied to it.
"""
nod = self.setchild(name=name, attrs=attrs, namespace=namespace)
nod.setattr(key, data)
return nod
def escapetext(st):
"""escapetext(str) -> str
Escape <, >, ", and & symbols in a string or unicode object.
"""
st = st.replace('&', '&')
st = st.replace('"', '"')
st = st.replace('<', '<')
st = st.replace('>', '>')
return st
escapetext = staticmethod(escapetext)
def serialize(self, pretty=False, depth=0, namespace=None):
"""serialize(pretty=False) -> str/unicode
Create a string representing the Node as XML. The result will
be str if possible, otherwise unicode. (In practice, usually
unicode.)
If *pretty* is True, the string will contain pretty indentation and
line breaks. (It will not end with a linebreak.) Pretty mode also
skips printing whitespace between tags.
"""
st = []
if (pretty):
st.append(' ' * depth)
st.extend([ '<', self.name ])
if (depth == 0):
selfns = self.getnamespace()
else:
selfns = self.namespace
if (selfns and namespace != selfns):
st.extend([' xmlns="', selfns, '"'])
namespace = selfns
for key in self.attrs.keys():
val = self.attrs[key]
st.extend([' ', key, '="', self.escapetext(val), '"'])
if (not self.contents):
st.append(' />')
else:
st.append('>')
ls = self.getcontents()
prettysquish = (len(ls) == 1
and (not isinstance(ls[0], Node)))
if (pretty and not prettysquish):
st.append('\n')
for val in ls:
if (isinstance(val, Node)):
st2 = val.serialize(pretty, depth+1, namespace)
st.append(st2)
if (pretty):
st.append('\n')
else:
if (pretty and not prettysquish):
val = val.strip()
if (not val):
continue
if (pretty and not prettysquish):
st.append(' ' * (depth+1))
st.append(self.escapetext(val))
if (pretty and not prettysquish):
st.append('\n')
if (pretty and not prettysquish):
st.append(' ' * depth)
st.extend([ '', self.name, '>'])
return ''.join(st)
def parse(data, encoding='UTF-8'):
"""parse(data, encoding='UTF-8') -> Node
Parse a string into a Node tree. If the string is not well-formed
XML, this raises xml.parsers.expat.ExpatError.
By default, *encoding* is UTF-8, and so the *data* must be a string
in that encoding. You can supply other encodings. If *encoding* is
None, the *data* should be a unicode object (or convertible to
unicode).
"""
if (encoding == None):
encoding = 'UTF-8'
udata = unicode(data)
(data, dummy) = codecs.getencoder(encoding)(udata)
parser = StaticNodeGenerator(data, encoding=encoding)
nod = parser.get()
parser.close()
return nod
parse = staticmethod(parse)
def parsefile(file, encoding='UTF-8'):
"""parsefile(file, encoding='UTF-8') -> Node
Parse the contents of a file into a Node tree. If the data is not
well-formed XML, this raises xml.parsers.expat.ExpatError.
By default, *encoding* is UTF-8, and so the *file* must contain data
in that encoding. You can supply other encodings.
This leaves *file* open, but at EOF.
"""
if (encoding == None):
raise Exception('you must give an encoding')
parser = FileNodeGenerator(file, encoding=encoding)
nod = parser.get()
parser.close()
return nod
parsefile = staticmethod(parsefile)
class NodeGenerator:
"""NodeGenerator: A base class for utilities which parse strings into
Node trees.
This class is not useful on its own. See StaticNodeGenerator for a
functional use of it.
NodeGenerator(encoding='UTF-8') -- constructor.
By default, *encoding* is UTF-8, and so the data must be a string
in that encoding. You can supply other encodings. If *encoding* is
None, the data should be unicode objects (or convertible to
unicode).
Public method:
close() -- shut down the NodeGenerator.
Internal methods:
handle_startnamespace() -- expat parser callback.
handle_startelement() -- expat parser callback.
handle_endelement() -- expat parser callback.
handle_data() -- expat parser callback.
"""
def __init__(self, encoding='UTF-8'):
self.result = None
self.curnode = None
self.depth = 0
self.namespaces = { 'http://www.w3.org/XML/1998/namespace' : 'xml' }
self.parser = xml.parsers.expat.ParserCreate(encoding=encoding,
namespace_separator=' ')
self.parser.returns_unicode = True
self.parser.StartNamespaceDeclHandler = self.handle_startnamespace
self.parser.StartElementHandler = self.handle_startelement
self.parser.EndElementHandler = self.handle_endelement
self.parser.CharacterDataHandler = self.handle_data
def close(self):
"""close() -> None
Shut down the NodeGenerator. You don't have to do this, but it
gets rid of the object without relying on garbage collection.
"""
self.parser.StartNamespaceDeclHandler = None
self.parser.StartElementHandler = None
self.parser.EndElementHandler = None
self.parser.CharacterDataHandler = None
self.result = None
self.parser = None
def handle_startnamespace(self, prefix, uri):
"""handle_startnamespace() -- expat parser callback. Do not call.
"""
self.namespaces[uri] = prefix
def handle_startelement(self, name, attrs):
"""handle_startelement() -- expat parser callback. Do not call.
"""
for key in attrs.keys():
pos = key.find(' ')
if (pos >= 0):
val = attrs.pop(key)
keyns = key[ : pos ]
keyname = key[ pos+1 : ]
key = self.namespaces[keyns] + ':' + keyname
attrs[key] = val
namespace = None
pos = name.find(' ')
if (pos >= 0):
namespace = name[ : pos ]
name = name[ pos+1 : ]
if (self.depth == 0):
if (self.result):
raise Exception('got top-level StartElement twice')
self.result = Node(name, attrs, namespace=namespace)
self.curnode = self.result
else:
nod = Node(name, attrs, namespace=namespace, parent=self.curnode)
self.curnode = nod
self.depth += 1
def handle_endelement(self, name):
"""handle_endelement() -- expat parser callback. Do not call.
"""
pos = name.find(' ')
if (pos >= 0):
name = name[ pos+1 : ]
if (name != self.curnode.getname()):
raise Exception('EndElement name did not match StartElement')
self.curnode = self.curnode.getparent()
self.depth -= 1
def handle_data(self, data):
"""handle_data() -- expat parser callback. Do not call.
"""
self.curnode.adddata(data)
class StaticNodeGenerator(NodeGenerator):
"""StaticNodeGenerator: A NodeGenerator which parses a single string.
StaticNodeGenerator(data, encoding='UTF-8') -- constructor.
By default, *encoding* is UTF-8, and so the *data* must be a string
in that encoding. You can supply other encodings. If *encoding* is
None, the *data* should be a unicode object (or convertible to
unicode).
Public method:
get() -- return the Node tree.
"""
def __init__(self, data, encoding='UTF-8'):
self.staticdata = data
self.parsed = False
NodeGenerator.__init__(self, encoding=encoding)
def get(self):
"""get() -> Node
Return the Node tree. If the data passed to the constructor was
not well-formed, this raises xml.parsers.expat.ExpatError.
"""
if (not self.parsed):
self.parser.Parse(self.staticdata, True)
self.parsed = True
return self.result
class FileNodeGenerator(NodeGenerator):
"""FileNodeGenerator: A NodeGenerator which parses a single string.
FileNodeGenerator(file, encoding='UTF-8') -- constructor.
By default, *encoding* is UTF-8, and so the *file* must contain data
in that encoding. You can supply other encodings. If *encoding* is
None, bad things will probably happen.
Public method:
get() -- return the Node tree.
"""
def __init__(self, file, encoding='UTF-8'):
self.file = file
NodeGenerator.__init__(self, encoding=encoding)
def get(self):
"""get() -> Node
Return the Node tree. This parses the entire file, reading it in
one line at a time. If the data is not well-formed, this raises
xml.parsers.expat.ExpatError.
"""
while (True):
ln = self.file.readline()
if (not ln):
break
self.parser.Parse(ln)
self.parser.Parse('', True)
return self.result
# ------------------- unit tests -------------------
class TestXMLData(unittest.TestCase):
"""Unit tests for the xmldata module.
"""
def test_data(self):
nod = Node('tag')
self.assertEqual(nod.getdata(), '')
nod.adddata('frog')
self.assertEqual(nod.getdata(), 'frog')
nod.setdata('hello')
self.assertEqual(nod.getdata(), 'hello')
nod.adddata('goodbye')
self.assertEqual(nod.getdata(), 'hellogoodbye')
nod.adddata(u'b\xe9d')
self.assertEqual(nod.getdata(), u'hellogoodbyeb\xe9d')
nod.cleardata()
self.assertEqual(nod.getdata(), '')
nod.adddata(117)
self.assertEqual(nod.getdata(), '117')
nod = Node('tag', data='string')
self.assertEqual(nod.getdata(), 'string')
def test_contents(self):
nod = Node('tag')
nod.adddata('11')
nod.adddata('')
nod.adddata('22')
nod2 = Node('child')
nod.addchild(nod2)
nod.adddata('33')
nod.adddata('44')
ls = nod.getcontents()
self.assertEqual(ls, ['1122', nod2, '3344'])
def test_child(self):
nod = Node('tag')
nod2 = Node('child', parent=nod)
nod.cleardata()
self.assertEqual(nod.getchildren(), [nod2])
self.assertEqual(nod2.parent, nod)
nod.removechild(nod2)
self.assertEqual(nod.getchildren(), [])
self.assertEqual(nod2.parent, None)
nod = Node('tag', data='hello')
nod2 = Node('child')
nod.addchild(nod2)
self.assertEqual(nod.getchildren(), [nod2])
self.assertEqual(nod2.parent, nod)
nod.removechild(nod2)
self.assertEqual(nod.getchildren(), [])
self.assertEqual(nod2.parent, None)
self.assertEqual(nod.getdata(), 'hello')
nod2 = Node('child')
nod = Node('tag', children=[nod2])
self.assertEqual(nod.getchildren(), [nod2])
self.assertEqual(nod2.parent, nod)
nod2 = Node('child')
nod = Node('tag', children=nod2)
self.assertEqual(nod.getchildren(), [nod2])
self.assertEqual(nod2.parent, nod)
nod2.delete()
self.assertEqual(nod.getchildren(), [])
self.assertEqual(nod2.parent, None)
def test_namespace(self):
nod = Node('tag')
self.assertEqual(nod.getnamespace(), None)
self.assertEqual(nod.getnamespace('cheese'), 'cheese')
self.assert_(not nod.hasattr('xmlns'))
nod = Node('tag', namespace='frog')
self.assertEqual(nod.getnamespace(), 'frog')
self.assert_(nod.hasattr('xmlns'))
nod.setnamespace('toad')
self.assertEqual(nod.getnamespace(), 'toad')
self.assert_(nod.hasattr('xmlns'))
nod = Node('tag', namespace='frog')
nod2 = Node('child', parent=nod)
self.assertEqual(nod2.getnamespace(), 'frog')
self.assert_(nod2.hasattr('xmlns'))
nod2.setnamespace('toad')
self.assertEqual(nod2.getnamespace(), 'toad')
nod2.setnamespace(None)
self.assertEqual(nod2.getnamespace(), 'frog')
nod2.setnamespace('')
self.assertEqual(nod2.getnamespace(), 'frog')
self.assert_(nod2.hasattr('xmlns'))
nod = Node('tag', namespace='frog')
nod2 = Node('child')
self.assertEqual(nod2.getnamespace(), None)
nod2.setparent(nod)
self.assertEqual(nod2.getnamespace(), 'frog')
nod2.remove(True)
self.assertEqual(nod2.getnamespace(), 'frog')
nod2 = Node('child')
self.assertEqual(nod2.getnamespace(), None)
nod2.setparent(nod)
self.assertEqual(nod2.getnamespace(), 'frog')
nod2.remove(False)
self.assertEqual(nod2.getnamespace(), None)
def test_attrs(self):
nod = Node('tag')
val = nod.getattrdef('key', None)
self.assertEqual(val, None)
val = nod.getattr('key')
self.assertEqual(val, None)
val = nod.getattrdef('key', 'none')
self.assertEqual(val, 'none')
val = nod.getattr('key', 'none')
self.assertEqual(val, 'none')
self.assertRaises(KeyError, nod.getattrdef, 'key')
self.assert_(not nod.hasattr('key'))
nod.setattr('frog', 'toad')
val = nod.getattrdef('frog')
self.assertEqual(val, 'toad')
val = nod.getattr('frog')
self.assertEqual(val, 'toad')
self.assert_(nod.hasattr('frog'))
val = nod.getattrdef('frog', 'lizard')
self.assertEqual(val, 'toad')
val = nod.getattr('frog', 'lizard')
self.assertEqual(val, 'toad')
self.assertEqual(nod.getattrs(), {'frog':'toad'})
nod.setattr('frog', '')
val = nod.getattrdef('frog', None)
self.assertEqual(val, None)
nod.setattr('frog', 'toad')
nod.clearattr('frog')
val = nod.getattrdef('frog', None)
self.assertEqual(val, None)
nod.setattrs(newt='newtskin', lizard='scale')
val = nod.getattrdef('newt')
self.assertEqual(val, 'newtskin')
val = nod.getattrdef('lizard')
self.assertEqual(val, 'scale')
nod.setattrs( {'worm':'wormcast'} )
val = nod.getattrdef('newt')
self.assertEqual(val, 'newtskin')
val = nod.getattrdef('worm')
self.assertEqual(val, 'wormcast')
nod.clearattrs()
self.assertRaises(KeyError, nod.getattrdef, 'newt')
self.assertRaises(KeyError, nod.getattrdef, 'lizard')
self.assertRaises(KeyError, nod.getattrdef, 'worm')
val = nod.getattr('newt')
self.assertEqual(val, None)
val = nod.getattr('lizard')
self.assertEqual(val, None)
val = nod.getattr('worm')
self.assertEqual(val, None)
nod = Node('tag', attrs={'frog':'toad'})
val = nod.getattrdef('frog')
self.assertEqual(val, 'toad')
nod.setattr('frog', 17)
val = nod.getattrdef('frog')
self.assertEqual(val, '17')
nod.setattr('frog', u'h\xe9llo')
val = nod.getattrdef('frog')
self.assertEqual(val, u'h\xe9llo')
def test_namespaceattr(self):
nod = Node('tag')
nod.setattr('xmlns', 'frog')
self.assertEqual(nod.getattrs(), {})
self.assertEqual(nod.getnamespace(), 'frog')
nod = Node('tag', attrs={'xmlns':'frog', 'newt':'lizard'})
self.assertEqual(nod.getattrs(), {'newt':'lizard'})
self.assertEqual(nod.getnamespace(), 'frog')
nod.clearattrs()
nod.setattr('xmlns', '')
self.assertEqual(nod.getattrs(), {})
self.assertEqual(nod.getnamespace(), None)
nod.setattrs(xmlns='toad', newt='lizard')
self.assertEqual(nod.getattrs(), {'newt':'lizard'})
self.assertEqual(nod.getnamespace(), 'toad')
nod.clearattrs()
nod.setnamespace('worm')
val = nod.getattrdef('xmlns')
self.assertEqual(val, 'worm')
def test_getchildren(self):
nod = Node('tag')
nod2 = Node('child', parent=nod)
nod3 = Node('child', parent=nod)
nod4 = Node('child', parent=nod, attrs={'frog':'toad'})
nod5 = Node('egg', parent=nod, attrs={'frog':'toad'})
nod6 = Node('egg', parent=nod, attrs={'frog':'lizard'})
nod7 = Node('egg', parent=nod, attrs={'key':'toad'})
nod8 = Node('child', parent=nod, namespace='html')
nod9 = Node('thing', parent=nod, namespace='html',
attrs={'key':'toad'})
nod.setdata('I am the data')
ls = nod.getchildren()
self.assertEqual(ls, [nod2, nod3, nod4, nod5, nod6, nod7, nod8, nod9])
val = nod.getchildren(first=True)
self.assertEqual(val, nod2)
ls = nod.getchildren(name='blarg')
self.assertEqual(ls, [])
val = nod.getchildren(first=True, name='blarg')
self.assertEqual(val, None)
ls = nod.getchildren(name='child')
self.assertEqual(ls, [nod2, nod3, nod4, nod8])
ls = nod.getchildren(name=u'egg')
self.assertEqual(ls, [nod5, nod6, nod7])
ls = nod.getchildren(namespace='html')
self.assertEqual(ls, [nod8, nod9])
ls = nod.getchildren(name='child', namespace='html')
self.assertEqual(ls, [nod8])
ls = nod.getchildren(attrs={'frog':'toad'})
self.assertEqual(ls, [nod4, nod5])
ls = nod.getchildren(name='egg', attrs={'frog':'toad'})
self.assertEqual(ls, [nod5])
ls = nod.getchildren(attrs={'spaz':'toad'})
self.assertEqual(ls, [])
ls = nod.getchildren(attrs={'xmlns':'html'})
self.assertEqual(ls, [nod8, nod9])
val = nod.getchild(name='egg')
self.assertEqual(val, nod5)
def test_setchild(self):
nod = Node('tag')
nod2 = nod.setchild('child')
self.assertEqual(nod2.getname(), 'child')
self.assertEqual(nod2.getparent(), nod)
nod3 = nod.setchilddata('child', 'hello')
self.assertEqual(nod2, nod3)
self.assertEqual(nod2.getdata(), 'hello')
nod3 = nod.setchilddata('child', 'hello')
self.assertEqual(nod2, nod3)
self.assertEqual(nod2.getdata(), 'hello')
nod3 = nod.addchilddata('more', 'good')
nod4 = nod.addchilddata('more', 'bye')
self.assertEqual(nod3, nod4)
self.assertNotEqual(nod2, nod3)
self.assertEqual(nod3.getdata(), 'goodbye')
nod3 = nod.setchildattr('child', 'frog', 'toad', namespace='html')
self.assertNotEqual(nod2, nod3)
self.assertEqual(nod3.getnamespace(), 'html')
self.assertEqual(nod3.getattrdef('frog'), 'toad')
nod3 = nod.setchild('ping').setchilddata('pong', 'data')
self.assertEqual(nod3.getdata(), 'data')
self.assertEqual(nod3.getname(), 'pong')
self.assertEqual(nod3.getparent().getname(), 'ping')
self.assertEqual(nod3.getparent().getparent(), nod)
nod3 = nod.setchild('query', namespace='jabber')
self.assertEqual(nod3.getname(), 'query')
self.assertEqual(nod3.getnamespace(), 'jabber')
nod3 = nod.setchild('reply', {'lizard':'snake'})
self.assertEqual(nod3.getname(), 'reply')
self.assertEqual(nod3.getattrdef('lizard'), 'snake')
def test_serialize(self):
nod = Node('tag')
val = nod.serialize()
self.assertEqual(val, '')
nod.setattr('frog', 'frogskin')
val = nod.serialize()
self.assertEqual(val, '')
nod2 = Node('child')
nod2.setparent(nod)
val = nod.serialize()
self.assertEqual(val, '')
nod.adddata('hel')
nod.adddata('lo')
val = nod.serialize()
self.assertEqual(val, 'hello')
nod.addchild(Node('more'))
val = nod.serialize()
self.assertEqual(val,
'hello')
nod.setnamespace('html')
val = nod.serialize()
self.assertEqual(val,
'hello')
nod2.setdata('goodbye')
nod2 = Node('yetmore')
nod2.addchild(Node('more'))
nod.addchild(nod2)
val = nod.serialize(True)
self.assertEqual(val,
"""
goodbye
hello
""")
nod = Node('tag')
nod.setdata('hi. "Hi." this&that.')
val = nod.serialize()
self.assertEqual(val, 'hi. "Hi." <tag> this&that.')
nod = Node('tag', namespace='html')
nod2 = nod.setchild('one', namespace='svg')
nod2.setchild('onex')
nod2.setchild('oney', namespace='html')
nod2.setchild('onez', namespace='svg')
nod.setchild('two', namespace='html')
val = nod.serialize()
self.assertEqual(val, '')
nod.setnamespace(None)
val = nod.serialize()
self.assertEqual(val, '')
def test_parse(self):
nod = Node.parse('')
self.assertEqual(nod.getname(), 'tag')
self.assertEqual(nod.getcontents(), [])
unist = u'h\xe9llo'
(st, dummy) = codecs.getencoder('UTF-8')(unist)
nod = Node.parse(st)
self.assertEqual(nod.getname(), 'tag')
self.assertEqual(nod.getdata(), u'h\xe9llo')
nod = Node.parse(unist, encoding=None)
self.assertEqual(nod.getname(), 'tag')
self.assertEqual(nod.getdata(), u'h\xe9llo')
badlist = [
'',
'data',
'',
'more',
'',
]
goodlist = [
'',
'data',
'data',
'datamore',
'datamore',
'X>Y<Z&Q',
]
for st in badlist:
self.assertRaises(xml.parsers.expat.ExpatError, Node.parse, st)
for st in goodlist:
nod = Node.parse(st)
st2 = nod.serialize()
self.assertEqual(st, st2)
def test_parsenamespaces(self):
list = [
(''
+ '',
'',
['nsuri', 'nsuri'] ),
(''
+ '',
'',
['nsuri', 'defuri'] ),
(''
+ '',
''
+ '',
['nsuri', 'defuri', 'defuri'] ),
(''
+ '',
'',
['nsuri', 'defuri'] ),
]
for (stin, stout, urilist) in list:
nod = Node.parse(stin)
st = nod.serialize()
self.assertEqual(st, stout)
while (nod):
self.assertEqual(nod.getnamespace(), urilist.pop(0))
nod = nod.getchild()
st = (''
+ '')
self.assertRaises(xml.parsers.expat.ExpatError, Node.parse, st)
def test_copy(self):
list = [
'hello',
'foohellobar ',
'',
'',
'',
]
for st in list:
nod = Node.parse(st)
nod2 = nod.copy()
self.assertNotEqual(nod, nod2)
self.assertEqual(st, nod.serialize())
self.assertEqual(st, nod2.serialize())
nod = Node('tag', namespace='parenturi')
nod2 = Node('child', parent=nod)
nod3 = nod2.copy()
self.assertEqual(nod2.serialize(), nod3.serialize())
self.assert_(not nod3.getparent())