Prevent DoS through XML entity expansion

Add a ProtectedXMLParser that overrides the
doctype declaration handler. The handler simply
throws an exception and prevents any further
parsing of the incoming xml.

Fixes LP Bug #1100282

Change-Id: I6488e1a6a52326006e7e7927ece5b5939b72e83e
This commit is contained in:
Davanum Srinivas 2013-02-26 15:43:50 -05:00
parent 5376c174bd
commit 6446b4ecd2
2 changed files with 39 additions and 1 deletions

View File

@ -175,6 +175,27 @@ class XMLDeserializerTest(testtools.TestCase):
self.assertRaises( self.assertRaises(
exception.MalformedRequestBody, deserializer.default, data_string) exception.MalformedRequestBody, deserializer.default, data_string)
def test_entity_expansion(self):
def killer_body():
return (("""<!DOCTYPE x [
<!ENTITY a "%(a)s">
<!ENTITY b "%(b)s">
<!ENTITY c "%(c)s">]>
<foo>
<bar>
<v1>%(d)s</v1>
</bar>
</foo>""") % {
'a': 'A' * 10,
'b': '&a;' * 10,
'c': '&b;' * 10,
'd': '&c;' * 9999,
}).strip()
deserializer = wsgi.XMLDeserializer()
self.assertRaises(
ValueError, deserializer.default, killer_body())
class JSONDeserializerTest(testtools.TestCase): class JSONDeserializerTest(testtools.TestCase):
def test_default_raise_Maiformed_Exception(self): def test_default_raise_Maiformed_Exception(self):

View File

@ -452,6 +452,18 @@ class JSONDeserializer(TextDeserializer):
return {'body': self._from_json(datastring)} return {'body': self._from_json(datastring)}
class ProtectedXMLParser(etree.XMLParser):
def __init__(self, *args, **kwargs):
etree.XMLParser.__init__(self, *args, **kwargs)
self._parser.StartDoctypeDeclHandler = self.start_doctype_decl
def start_doctype_decl(self, name, sysid, pubid, internal):
raise ValueError(_("Inline DTD forbidden"))
def doctype(self, name, pubid, system):
raise ValueError(_("Inline DTD forbidden"))
class XMLDeserializer(TextDeserializer): class XMLDeserializer(TextDeserializer):
def __init__(self, metadata=None): def __init__(self, metadata=None):
@ -493,12 +505,17 @@ class XMLDeserializer(TextDeserializer):
node.remove(link) node.remove(link)
return link_list and {link_key: link_list} or {} return link_list and {link_key: link_list} or {}
def _parseXML(self, text):
parser = ProtectedXMLParser()
parser.feed(text)
return parser.close()
def _from_xml(self, datastring): def _from_xml(self, datastring):
if datastring is None: if datastring is None:
return None return None
plurals = set(self.metadata.get('plurals', {})) plurals = set(self.metadata.get('plurals', {}))
try: try:
node = etree.fromstring(datastring) node = self._parseXML(datastring)
root_tag = self._get_key(node.tag) root_tag = self._get_key(node.tag)
# Deserialize link node was needed by unit test for verifying # Deserialize link node was needed by unit test for verifying
# the request's response # the request's response