[rfc] Simplified XML parsing

Daniel Jacobowitz drow@false.org
Thu Dec 7 16:02:00 GMT 2006


CodeSourcery's GDB release currently has three XML parsers in it.  One
of those is in HEAD already; another I'm about to post, and the third
hopefully not long after.  The amount of duplication and unnecessary
complexity was disappointing, but I couldn't see a better way to do
it.  Fortunately, Nathan Sidwell could :-)  This version is entirely
a ripoff of the XML parser he wrote for another CodeSourcery project,
adapted to fit into GDB (and use its cleanup / exception mechanisms).

This patch rewrites the memory-map parser, dividing it into an
infrastructure and a set of tables and action functions.  The bulk of the
parser now looks like this:

+const struct gdb_xml_element memory_children[] = {
+  { "property", property_attributes, NULL,
+    GDB_XML_EF_REPEATABLE | GDB_XML_EF_OPTIONAL,
+    memory_map_start_property, memory_map_end_property },
+  { NULL, NULL, NULL, GDB_XML_EF_NONE, NULL, NULL }
+};
+
+const struct gdb_xml_enum memory_type_enum[] = {
+  { "ram", MEM_RW },
+  { "rom", MEM_RO },
+  { "flash", MEM_FLASH },
+  { NULL, 0 }
+};
+
+const struct gdb_xml_attribute memory_attributes[] = {
+  { "start", GDB_XML_AF_NONE, gdb_xml_parse_attr_ulongest, NULL },
+  { "length", GDB_XML_AF_NONE, gdb_xml_parse_attr_ulongest, NULL },
+  { "type", GDB_XML_AF_NONE, gdb_xml_parse_attr_enum, &memory_type_enum },
+  { NULL, GDB_XML_AF_NONE, NULL, NULL }
+};

There are functions to call at the start and end of particular elements,
and for attributes, and everything else is handled by code which is generic
to parsing any sort of XML.  It's quite simplified relative to expat - for
instance, there's no support for an element which has both elements
and text as its children, like "other <b>text</b>".  That could be easily
added, but so far I haven't needed it for anything; I kept this as simple
as I could.  It stripped about 50% of the size of the complete feature
description parser in my working tree (about a thousand lines shorter).

There's also a "set debug xml" now which will work consistently for all
parsers, and a bit better than my handwritten debugging in the past.

Any thoughts on this patch?  Nathan, could you take a quick look at the
header and see if I stripped out something I ought to have left in?

-- 
Daniel Jacobowitz
CodeSourcery

2006-12-06  Daniel Jacobowitz  <dan@codesourcery.com>

	* memory-map.c (struct_memory_map_parsing_data): Remove most
	members.  Make property_name an array.
	(free_memory_map_parsing_data, memory_map_start_element)
	(memory_map_end_element, memory_map_character_data): Delete.
	(memory_map_start_memory, memory_map_end_memory)
	(memory_map_start_property, memory_map_end_property): New functions.
	(property_attributes, memory_children, memory_type_enum)
	(memory_attributes, memory_map_children, memory_map_elements): New.
	(parse_memory_map): Rewrite.
	* xml-support.c (debug_xml): New.
	(xml_get_required_attribute, xml_get_integer_attribute)
	(xml_get_enum_value, free_xml_parser, make_cleanup_free_xml_parser):
	Delete.
	(struct scope_level, struct gdb_xml_parser, gdb_xml_body_text)
	(gdb_xml_debug, gdb_xml_error, gdb_xml_values_cleanup)
	(gdb_xml_start_element, gdb_xml_start_element_wrapper)
	(gdb_xml_end_element, gdb_xml_end_element_wrapper, gdb_xml_cleanup)
	(gdb_xml_create_parser_and_cleanup, gdb_xml_parse)
	(gdb_xml_parse_ulongest, gdb_xml_parse_attr_ulongest)
	(gdb_xml_parse_attr_enum, show_debug_xml, _initialize_xml_support):
	New.
	* xml-support.h (struct gdb_xml_value, gdb_xml_attribute_handler)
	(enum gdb_xml_attribute_flag, struct gdb_xml_attribute)
	(enum gdb_xml_element_flag, struct gdb_xml_element)
	(gdb_xml_element_start_handler, gdb_xml_element_end_handler)
	(struct gdb_xml_enum): New.
	(gdb_xml_create_parser_and_cleanup, gdb_xml_parse, gdb_xml_debug)
	(gdb_xml_error, gdb_xml_parse_attr_ulongest)
	(gdb_xml_parse_attr_enum, gdb_xml_parse_ulongest): New prototypes.
	(xml_get_required_attribute, xml_get_integer_attribute)
	(xml_get_enum_value, make_cleanup_free_xml_parser): Delete prototypes.
	* Makefile.in (xml_support_h, xml-support.o): Update.

	* gdb.texinfo (Debugging Output): Document "set debug xml"
	and "show debug xml".

---
 gdb/Makefile.in     |    4 
 gdb/doc/gdb.texinfo |    5 
 gdb/memory-map.c    |  266 +++++++++----------------
 gdb/xml-support.c   |  550 ++++++++++++++++++++++++++++++++++++++++++++++------
 gdb/xml-support.h   |  168 ++++++++++++++-
 5 files changed, 753 insertions(+), 240 deletions(-)

Index: src/gdb/memory-map.c
===================================================================
--- src.orig/gdb/memory-map.c	2006-12-06 16:03:34.000000000 -0500
+++ src/gdb/memory-map.c	2006-12-06 16:04:11.000000000 -0500
@@ -50,165 +50,87 @@ parse_memory_map (const char *memory_map
 
 #include "gdb_expat.h"
 
-/* Internal parsing data passed to all Expat callbacks.  */
+/* Internal parsing data passed to all XML callbacks.  */
 struct memory_map_parsing_data
   {
     VEC(mem_region_s) **memory_map;
-    struct mem_region *currently_parsing;
-    char *character_data;
-    const char *property_name;
-    int capture_text;
+    char property_name[32];
   };
 
-static void
-free_memory_map_parsing_data (void *p_)
-{
-  struct memory_map_parsing_data *p = p_;
+/* Handle the start of a <memory> element.  */
 
-  xfree (p->character_data);
+static void
+memory_map_start_memory (struct gdb_xml_parser *parser,
+			 const struct gdb_xml_element *element,
+			 void *user_data, VEC(gdb_xml_value_s) *attributes)
+{
+  struct memory_map_parsing_data *data = user_data;
+  struct mem_region *r = VEC_safe_push (mem_region_s, *data->memory_map, NULL);
+  ULONGEST *start_p, *length_p, *type_p;
+
+  start_p = VEC_index (gdb_xml_value_s, attributes, 0)->value;
+  length_p = VEC_index (gdb_xml_value_s, attributes, 1)->value;
+  type_p = VEC_index (gdb_xml_value_s, attributes, 2)->value;
+
+  mem_region_init (r);
+  r->lo = *start_p;
+  r->hi = r->lo + *length_p;
+  r->attrib.mode = *type_p;
+  r->attrib.blocksize = -1;
 }
 
-/* Callback called by Expat on start of element.
-   DATA_ is pointer to memory_map_parsing_data
-   NAME is the name of element
-   ATTRS is the zero-terminated array of attribute names and
-   attribute values.
-
-   This function handles the following elements:
-   - 'memory' -- creates a new memory region and initializes it
-     from attributes.  Sets DATA_.currently_parsing to the new region.
-   - 'properties' -- sets DATA.capture_text.  */
+/* Handle the end of a <memory> element.  Verify that any necessary
+   children were present.  */
 
 static void
-memory_map_start_element (void *data_, const XML_Char *name,
-			  const XML_Char **attrs)
+memory_map_end_memory (struct gdb_xml_parser *parser,
+		       const struct gdb_xml_element *element,
+		       void *user_data, const char *body_text)
 {
-  static const XML_Char *type_names[] = {"ram", "rom", "flash", 0};
-  static int type_values[] = { MEM_RW, MEM_RO, MEM_FLASH };
-  struct memory_map_parsing_data *data = data_;
-  struct gdb_exception ex;
-
-  TRY_CATCH (ex, RETURN_MASK_ERROR)
-    {
-      if (strcmp (name, "memory") == 0)
-	{
-	  struct mem_region *r;
-
-	  r = VEC_safe_push (mem_region_s, *data->memory_map, NULL);
-	  mem_region_init (r);
-
-	  r->lo = xml_get_integer_attribute (attrs, "start");
-	  r->hi = r->lo + xml_get_integer_attribute (attrs, "length");
-	  r->attrib.mode = xml_get_enum_value (attrs, "type", type_names,
-					       type_values);
-	  r->attrib.blocksize = -1;
-
-	  data->currently_parsing = r;
-	}
-      else if (strcmp (name, "property") == 0)
-	{
-	  if (!data->currently_parsing)
-	    throw_error (XML_PARSE_ERROR,
-		_("memory map: found 'property' element outside 'memory'"));
-
-	  data->capture_text = 1;
+  struct memory_map_parsing_data *data = user_data;
+  struct mem_region *r = VEC_last (mem_region_s, *data->memory_map);
 
-	  data->property_name = xml_get_required_attribute (attrs, "name");
-	}
-    }
-  if (ex.reason < 0)
-    throw_error
-      (ex.error, _("While parsing element %s:\n%s"), name, ex.message);
+  if (r->attrib.mode == MEM_FLASH && r->attrib.blocksize == -1)
+    gdb_xml_error (parser, _("Flash block size is not set"));
 }
 
-/* Callback called by Expat on start of element.  DATA_ is a pointer
-   to our memory_map_parsing_data.  NAME is the name of the element.
-
-   This function handles the following elements:
-   - 'property' -- check that the property name is 'blocksize' and
-     sets DATA->currently_parsing->attrib.blocksize
-   - 'memory' verifies that flash block size is set.  */
+/* Handle the start of a <property> element by saving the name
+   attribute for later.  */
 
 static void
-memory_map_end_element (void *data_, const XML_Char *name)
+memory_map_start_property (struct gdb_xml_parser *parser,
+			   const struct gdb_xml_element *element,
+			   void *user_data, VEC(gdb_xml_value_s) *attributes)
 {
-  struct memory_map_parsing_data *data = data_;
-  struct gdb_exception ex;
+  struct memory_map_parsing_data *data = user_data;
+  char *name;
 
-  TRY_CATCH (ex, RETURN_MASK_ERROR)
-    {
-      if (strcmp (name, "property") == 0)
-	{
-	  if (strcmp (data->property_name, "blocksize") == 0)
-	    {
-	      if (!data->character_data)
-		throw_error (XML_PARSE_ERROR,
-			     _("Empty content of 'property' element"));
-	      char *end = NULL;
-	      data->currently_parsing->attrib.blocksize
-		= strtoul (data->character_data, &end, 0);
-	      if (*end != '\0')
-		throw_error (XML_PARSE_ERROR,
-			     _("Invalid content of the 'blocksize' property"));
-	    }
-	  else
-	    throw_error (XML_PARSE_ERROR,
-			 _("Unknown memory region property: %s"), name);
-
-	  data->capture_text = 0;
-	}
-      else if (strcmp (name, "memory") == 0)
-	{
-	  if (data->currently_parsing->attrib.mode == MEM_FLASH
-	      && data->currently_parsing->attrib.blocksize == -1)
-	    throw_error (XML_PARSE_ERROR,
-			 _("Flash block size is not set"));
-
-	  data->currently_parsing = 0;
-	  data->character_data = 0;
-	}
-    }
-  if (ex.reason < 0)
-    throw_error
-      (ex.error, _("while parsing element %s: \n%s"), name, ex.message);
+  name = VEC_index (gdb_xml_value_s, attributes, 0)->value;
+  snprintf (data->property_name, sizeof (data->property_name), "%s", name);
 }
 
-/* Callback called by expat for all character data blocks.
-   DATA_ is the pointer to memory_map_parsing_data.
-   S is the point to character data.
-   LEN is the length of data; the data is not zero-terminated.
+/* Handle the end of a <property> element and its value.  */
 
-   If DATA_->CAPTURE_TEXT is 1, appends this block of characters
-   to DATA_->CHARACTER_DATA.  */
 static void
-memory_map_character_data (void *data_, const XML_Char *s,
-			   int len)
+memory_map_end_property (struct gdb_xml_parser *parser,
+			 const struct gdb_xml_element *element,
+			 void *user_data, const char *body_text)
 {
-  struct memory_map_parsing_data *data = data_;
-  int current_size = 0;
+  struct memory_map_parsing_data *data = user_data;
+  char *name = data->property_name;
 
-  if (!data->capture_text)
-    return;
-
-  /* Expat interface does not guarantee that a single call to
-     a handler will be made. Actually, one call for each line
-     will be made, and character data can possibly span several
-     lines.
-
-     Take care to realloc the data if needed.  */
-  if (!data->character_data)
-    data->character_data = xmalloc (len + 1);
-  else
+  if (strcmp (name, "blocksize") == 0)
     {
-      current_size = strlen (data->character_data);
-      data->character_data = xrealloc (data->character_data,
-				       current_size + len + 1);
-    }
+      struct mem_region *r = VEC_last (mem_region_s, *data->memory_map);
 
-  memcpy (data->character_data + current_size, s, len);
-  data->character_data[current_size + len] = '\0';
+      r->attrib.blocksize = gdb_xml_parse_ulongest (parser, body_text);
+    }
+  else
+    gdb_xml_debug (parser, _("Unknown property \"%s\""), name);
 }
 
+/* Discard the constructed memory map (if an error occurs).  */
+
 static void
 clear_result (void *p)
 {
@@ -217,56 +139,66 @@ clear_result (void *p)
   *result = NULL;
 }
 
+/* The allowed elements and attributes for an XML memory map.  */
+
+const struct gdb_xml_attribute property_attributes[] = {
+  { "name", GDB_XML_AF_NONE, NULL, NULL },
+  { NULL, GDB_XML_AF_NONE, NULL, NULL }
+};
+
+const struct gdb_xml_element memory_children[] = {
+  { "property", property_attributes, NULL,
+    GDB_XML_EF_REPEATABLE | GDB_XML_EF_OPTIONAL,
+    memory_map_start_property, memory_map_end_property },
+  { NULL, NULL, NULL, GDB_XML_EF_NONE, NULL, NULL }
+};
+
+const struct gdb_xml_enum memory_type_enum[] = {
+  { "ram", MEM_RW },
+  { "rom", MEM_RO },
+  { "flash", MEM_FLASH },
+  { NULL, 0 }
+};
+
+const struct gdb_xml_attribute memory_attributes[] = {
+  { "start", GDB_XML_AF_NONE, gdb_xml_parse_attr_ulongest, NULL },
+  { "length", GDB_XML_AF_NONE, gdb_xml_parse_attr_ulongest, NULL },
+  { "type", GDB_XML_AF_NONE, gdb_xml_parse_attr_enum, &memory_type_enum },
+  { NULL, GDB_XML_AF_NONE, NULL, NULL }
+};
+
+const struct gdb_xml_element memory_map_children[] = {
+  { "memory", memory_attributes, memory_children, GDB_XML_EF_REPEATABLE,
+    memory_map_start_memory, memory_map_end_memory },
+  { NULL, NULL, NULL, GDB_XML_EF_NONE, NULL, NULL }
+};
+
+const struct gdb_xml_element memory_map_elements[] = {
+  { "memory-map", NULL, memory_map_children, GDB_XML_EF_NONE,
+    NULL, NULL },
+  { NULL, NULL, NULL, GDB_XML_EF_NONE, NULL, NULL }
+};
+
 VEC(mem_region_s) *
 parse_memory_map (const char *memory_map)
 {
+  struct gdb_xml_parser *parser;
   VEC(mem_region_s) *result = NULL;
-  struct cleanup *back_to = make_cleanup (null_cleanup, NULL);
-  struct cleanup *before_deleting_result;
-  struct cleanup *saved;
-  volatile struct gdb_exception ex;
-  int ok = 0;
-
+  struct cleanup *before_deleting_result, *back_to;
   struct memory_map_parsing_data data = {};
 
-  XML_Parser parser = XML_ParserCreateNS (NULL, '!');
-  if (parser == NULL)
-    goto out;
+  back_to = make_cleanup (null_cleanup, NULL);
+  parser = gdb_xml_create_parser_and_cleanup (_("target memory map"),
+					      memory_map_elements, &data);
 
-  make_cleanup_free_xml_parser (parser);
-  make_cleanup (free_memory_map_parsing_data, &data);
   /* Note: 'clear_result' will zero 'result'.  */
   before_deleting_result = make_cleanup (clear_result, &result);
-
-  XML_SetElementHandler (parser, memory_map_start_element,
-			 memory_map_end_element);
-  XML_SetCharacterDataHandler (parser, memory_map_character_data);
-  XML_SetUserData (parser, &data);
   data.memory_map = &result;
 
-  TRY_CATCH (ex, RETURN_MASK_ERROR)
-    {
-      if (XML_Parse (parser, memory_map, strlen (memory_map), 1)
-	  != XML_STATUS_OK)
-	{
-	  enum XML_Error err = XML_GetErrorCode (parser);
-
-	  throw_error (XML_PARSE_ERROR, "%s", XML_ErrorString (err));
-	}
-    }
-  if (ex.reason != GDB_NO_ERROR)
-    {
-      if (ex.error == XML_PARSE_ERROR)
-	/* Just report it.  */
-	warning (_("Could not parse XML memory map: %s"), ex.message);
-      else
-	throw_exception (ex);
-    }
-  else
+  if (gdb_xml_parse (parser, memory_map) == 0)
     /* Parsed successfully, don't need to delete the result.  */
     discard_cleanups (before_deleting_result);
 
- out:
   do_cleanups (back_to);
   return result;
 }
Index: src/gdb/xml-support.c
===================================================================
--- src.orig/gdb/xml-support.c	2006-12-06 16:03:34.000000000 -0500
+++ src/gdb/xml-support.c	2006-12-07 10:21:27.000000000 -0500
@@ -21,6 +21,10 @@
    Boston, MA 02110-1301, USA.  */
 
 #include "defs.h"
+#include "gdbcmd.h"
+
+/* Debugging flag.  */
+static int debug_xml;
 
 /* The contents of this file are only useful if XML support is
    available.  */
@@ -31,24 +35,442 @@
 
 #include "gdb_expat.h"
 #include "gdb_string.h"
+#include "safe-ctype.h"
+
+/* A parsing level -- used to keep track of the current element
+   nesting.  */
+struct scope_level
+{
+  /* Elements we allow at this level.  */
+  const struct gdb_xml_element *elements;
 
-/* Returns the value of attribute ATTR from expat attribute list
-   ATTRLIST.  If not found, throws an exception.  */
+  /* The element which we are within.  */
+  const struct gdb_xml_element *element;
 
-const XML_Char *
-xml_get_required_attribute (const XML_Char **attrs,
-			    const XML_Char *attr)
+  /* Mask of which elements we've seen at this level (used for
+     optional and repeatable checking).  */
+  unsigned int seen;
+
+  /* Body text accumulation.  */
+  struct obstack *body;
+};
+typedef struct scope_level scope_level_s;
+DEF_VEC_O(scope_level_s);
+
+/* The parser itself, and our additional state.  */
+struct gdb_xml_parser
 {
-  const XML_Char **p;
-  for (p = attrs; *p; p += 2)
+  XML_Parser expat_parser;	/* The underlying expat parser.  */
+
+  const char *name;		/* Name of this parser.  */
+  void *user_data;		/* The user's callback data, for handlers.  */
+
+  VEC(scope_level_s) *scopes;	/* Scoping stack.  */
+
+  struct gdb_exception error;	/* A thrown error, if any.  */
+  int last_line;		/* The line of the thrown error, or 0.  */
+};
+
+/* Process some body text.  We accumulate the text for later use; it's
+   wrong to do anything with it immediately, because a single block of
+   text might be broken up into multiple calls to this function.  */
+
+static void
+gdb_xml_body_text (void *data, const XML_Char *text, int length)
+{
+  struct gdb_xml_parser *parser = data;
+  struct scope_level *scope = VEC_last (scope_level_s, parser->scopes);
+
+  if (scope->body == NULL)
     {
-      const char *name = p[0];
-      const char *val = p[1];
+      scope->body = XZALLOC (struct obstack);
+      obstack_init (scope->body);
+    }
+
+  obstack_grow (scope->body, text, length);
+}
+
+/* Issue a debugging message from one of PARSER's handlers.  */
+
+void
+gdb_xml_debug (struct gdb_xml_parser *parser, const char *format, ...)
+{
+  int line = XML_GetCurrentLineNumber (parser->expat_parser);
+  va_list ap;
+  char *message;
+
+  if (!debug_xml)
+    return;
+
+  va_start (ap, format);
+  message = xstrvprintf (format, ap);
+  if (line)
+    fprintf_unfiltered (gdb_stderr, "%s (line %d): %s\n",
+			parser->name, line, message);
+  else
+    fprintf_unfiltered (gdb_stderr, "%s: %s\n",
+			parser->name, message);
+  xfree (message);
+}
+
+/* Issue an error message from one of PARSER's handlers, and stop
+   parsing.  */
+
+void
+gdb_xml_error (struct gdb_xml_parser *parser, const char *format, ...)
+{
+  int line = XML_GetCurrentLineNumber (parser->expat_parser);
+  va_list ap;
+
+  parser->last_line = line;
+  va_start (ap, format);
+  throw_verror (XML_PARSE_ERROR, format, ap);
+}
 
-      if (strcmp (name, attr) == 0)
-	return val;
+/* Clean up a vector of parsed attribute values.  */
+
+static void
+gdb_xml_values_cleanup (void *data)
+{
+  VEC(gdb_xml_value_s) **values = data;
+  struct gdb_xml_value *value;
+  int ix;
+
+  for (ix = 0; VEC_iterate (gdb_xml_value_s, *values, ix, value); ix++)
+    xfree (value->value);
+  VEC_free (gdb_xml_value_s, *values);
+}
+
+/* Handle the start of an element.  DATA is our local XML parser, NAME
+   is the element, and ATTRS are the names and values of this
+   element's attributes.  */
+
+static void
+gdb_xml_start_element (void *data, const XML_Char *name,
+		       const XML_Char **attrs)
+{
+  struct gdb_xml_parser *parser = data;
+  struct scope_level *scope = VEC_last (scope_level_s, parser->scopes);
+  struct scope_level new_scope;
+  const struct gdb_xml_element *element;
+  const struct gdb_xml_attribute *attribute;
+  VEC(gdb_xml_value_s) *attributes = NULL;
+  unsigned int seen;
+  struct cleanup *back_to;
+
+  back_to = make_cleanup (gdb_xml_values_cleanup, &attributes);
+
+  /* Push an error scope.  If we return or throw an exception before
+     filling this in, it will tell us to ignore children of this
+     element.  */
+  memset (&new_scope, 0, sizeof (new_scope));
+  VEC_safe_push (scope_level_s, parser->scopes, &new_scope);
+
+  gdb_xml_debug (parser, _("Entering element <%s>"), name);
+
+  /* Find this element in the list of the current scope's allowed
+     children.  Record that we've seen it.  */
+
+  seen = 1;
+  for (element = scope->elements; element && element->name;
+       element++, seen <<= 1)
+    if (strcmp (element->name, name) == 0)
+      break;
+
+  if (element == NULL || element->name == NULL)
+    {
+      gdb_xml_debug (parser, _("Element <%s> unknown"), name);
+      do_cleanups (back_to);
+      return;
     }
-  throw_error (XML_PARSE_ERROR, _("Can't find attribute %s"), attr);
+
+  if (!(element->flags & GDB_XML_EF_REPEATABLE) && (seen & scope->seen))
+    gdb_xml_error (parser, _("Element <%s> only expected once"), name);
+
+  scope->seen |= seen;
+
+  for (attribute = element->attributes;
+       attribute != NULL && attribute->name != NULL;
+       attribute++)
+    {
+      const char *val = NULL;
+      const XML_Char **p;
+      void *parsed_value;
+      struct gdb_xml_value new_value;
+
+      for (p = attrs; *p != NULL; p += 2)
+	if (!strcmp (attribute->name, p[0]))
+	  {
+	    val = p[1];
+	    break;
+	  }
+
+      if (*p != NULL && val == NULL)
+	{
+	  gdb_xml_debug (parser, _("Attribute \"%s\" missing a value"),
+			 attribute->name);
+	  continue;
+	}
+
+      if (*p == NULL && !(attribute->flags & GDB_XML_AF_OPTIONAL))
+	{
+	  gdb_xml_error (parser, _("Required attribute \"%s\" of "
+				   "<%s> not specified"),
+			 attribute->name, element->name);
+	  continue;
+	}
+
+      if (*p == NULL)
+	continue;
+
+      gdb_xml_debug (parser, _("Parsing attribute %s=\"%s\""),
+		     attribute->name, val);
+
+      if (attribute->handler)
+	parsed_value = attribute->handler (parser, attribute, val);
+      else
+	parsed_value = xstrdup (val);
+
+      new_value.name = attribute->name;
+      new_value.value = parsed_value;
+      VEC_safe_push (gdb_xml_value_s, attributes, &new_value);
+    }
+
+  /* Check for unrecognized attributes.  */
+  if (debug_xml)
+    {
+      const XML_Char **p;
+
+      for (p = attrs; *p != NULL; p += 2)
+	{
+	  for (attribute = element->attributes;
+	       attribute != NULL && attribute->name != NULL;
+	       attribute++)
+	    if (strcmp (attribute->name, *p) == 0)
+	      break;
+
+	  if (attribute == NULL || attribute->name == NULL)
+	    gdb_xml_debug (parser, _("Ignoring unknown attribute %s"), *p);
+	}
+    }
+
+  /* Call the element handler if there is one.  */
+  if (element->start_handler)
+    element->start_handler (parser, element, parser->user_data, attributes);
+
+  /* Fill in a new scope level.  */
+  scope = VEC_last (scope_level_s, parser->scopes);
+  scope->element = element;
+  scope->elements = element->children;
+
+  do_cleanups (back_to);
+}
+
+/* Wrapper for gdb_xml_start_element, to prevent throwing exceptions
+   through expat.  */
+
+static void
+gdb_xml_start_element_wrapper (void *data, const XML_Char *name,
+			       const XML_Char **attrs)
+{
+  struct gdb_xml_parser *parser = data;
+  volatile struct gdb_exception ex;
+
+  if (parser->error.reason < 0)
+    return;
+
+  TRY_CATCH (ex, RETURN_MASK_ALL)
+    {
+      gdb_xml_start_element (data, name, attrs);
+    }
+  if (ex.reason < 0)
+    {
+      parser->error = ex;
+      XML_StopParser (parser->expat_parser, XML_FALSE);
+    }
+}
+
+/* Handle the end of an element.  DATA is our local XML parser, and
+   NAME is the current element.  */
+
+static void
+gdb_xml_end_element (void *data, const XML_Char *name)
+{
+  struct gdb_xml_parser *parser = data;
+  struct scope_level *scope = VEC_last (scope_level_s, parser->scopes);
+  const struct gdb_xml_element *element;
+  unsigned int seen;
+  char *body;
+
+  gdb_xml_debug (parser, _("Leaving element <%s>"), name);
+
+  for (element = scope->elements, seen = 1;
+       element != NULL && element->name != NULL;
+       element++, seen <<= 1)
+    if ((scope->seen & seen) == 0
+	&& (element->flags & GDB_XML_EF_OPTIONAL) == 0)
+      gdb_xml_error (parser, _("Required element \<%s\> is missing"),
+		     element->name);
+
+  /* Call the element processor. */
+  if (scope->body == NULL)
+    body = "";
+  else
+    {
+      int length;
+
+      length = obstack_object_size (scope->body);
+      obstack_1grow (scope->body, '\0');
+      body = obstack_finish (scope->body);
+
+      /* Strip leading and trailing whitespace.  */
+      while (length > 0 && ISSPACE (body[length-1]))
+	body[--length] = '\0';
+      while (*body && ISSPACE (*body))
+	body++;
+    }
+
+  if (scope->element != NULL && scope->element->end_handler)
+    scope->element->end_handler (parser, scope->element, parser->user_data,
+				 body);
+
+  /* Pop the scope level.  */
+  if (scope->body)
+    {
+      obstack_free (scope->body, NULL);
+      xfree (scope->body);
+    }
+  VEC_pop (scope_level_s, parser->scopes);
+}
+
+/* Wrapper for gdb_xml_end_element, to prevent throwing exceptions
+   through expat.  */
+
+static void
+gdb_xml_end_element_wrapper (void *data, const XML_Char *name)
+{
+  struct gdb_xml_parser *parser = data;
+  volatile struct gdb_exception ex;
+
+  if (parser->error.reason < 0)
+    return;
+
+  TRY_CATCH (ex, RETURN_MASK_ALL)
+    {
+      gdb_xml_end_element (data, name);
+    }
+  if (ex.reason < 0)
+    {
+      parser->error = ex;
+      XML_StopParser (parser->expat_parser, XML_FALSE);
+    }
+}
+
+/* Free a parser and all its associated state.  */
+
+static void
+gdb_xml_cleanup (void *arg)
+{
+  struct gdb_xml_parser *parser = arg;
+  struct scope_level *scope;
+  int ix;
+
+  XML_ParserFree (parser->expat_parser);
+
+  /* Clean up the scopes.  */
+  for (ix = 0; VEC_iterate (scope_level_s, parser->scopes, ix, scope); ix++)
+    if (scope->body)
+      {
+	obstack_free (scope->body, NULL);
+	xfree (scope->body);
+      }
+  VEC_free (scope_level_s, parser->scopes);
+
+  xfree (parser);
+}
+
+/* Initialize and return a parser.  Register a cleanup to destroy the
+   parser.  */
+
+struct gdb_xml_parser *
+gdb_xml_create_parser_and_cleanup (const char *name,
+				   const struct gdb_xml_element *elements,
+				   void *user_data)
+{
+  struct gdb_xml_parser *parser;
+  struct scope_level start_scope;
+
+  /* Initialize the parser.  */
+  parser = XZALLOC (struct gdb_xml_parser);
+  parser->expat_parser = XML_ParserCreateNS (NULL, '!');
+  if (parser->expat_parser == NULL)
+    {
+      xfree (parser);
+      nomem (0);
+    }
+
+  parser->name = name;
+
+  parser->user_data = user_data;
+  XML_SetUserData (parser->expat_parser, parser);
+
+  /* Set the callbacks.  */
+  XML_SetElementHandler (parser->expat_parser, gdb_xml_start_element_wrapper,
+			 gdb_xml_end_element_wrapper);
+  XML_SetCharacterDataHandler (parser->expat_parser, gdb_xml_body_text);
+
+  /* Initialize the outer scope.  */
+  memset (&start_scope, 0, sizeof (start_scope));
+  start_scope.elements = elements;
+  VEC_safe_push (scope_level_s, parser->scopes, &start_scope);
+
+  make_cleanup (gdb_xml_cleanup, parser);
+
+  return parser;
+}
+
+/* Invoke PARSER on BUFFER.  BUFFER is the data to parse, which
+   should be NUL-terminated.
+
+   The return value is 0 for success or -1 for error.  It may throw,
+   but only if something unexpected goes wrong during parsing; parse
+   errors will be caught, warned about, and reported as failure.  */
+
+int
+gdb_xml_parse (struct gdb_xml_parser *parser, const char *buffer)
+{
+  enum XML_Status status;
+  const char *error_string;
+
+  status = XML_Parse (parser->expat_parser, buffer, strlen (buffer), 1);
+
+  if (status == XML_STATUS_OK && parser->error.reason == 0)
+    return 0;
+
+  if (parser->error.reason == RETURN_ERROR
+      && parser->error.error == XML_PARSE_ERROR)
+    {
+      gdb_assert (parser->error.message != NULL);
+      error_string = parser->error.message;
+    }
+  else if (status == XML_STATUS_ERROR)
+    {
+      enum XML_Error err = XML_GetErrorCode (parser->expat_parser);
+      error_string = XML_ErrorString (err);
+    }
+  else
+    {
+      gdb_assert (parser->error.reason < 0);
+      throw_exception (parser->error);
+    }
+
+  if (parser->last_line != 0)
+    warning (_("while parsing %s (at line %d): %s"), parser->name,
+	     parser->last_line, error_string);
+  else
+    warning (_("while parsing %s: %s"), parser->name, error_string);
+
+  return -1;
 }
 
 /* Parse a field VALSTR that we expect to contain an integer value.
@@ -74,72 +496,84 @@ xml_parse_unsigned_integer (const char *
   return 0;
 }
 
-/* Gets the value of an integer attribute named ATTR, if it's present.
-   If the attribute is not found, or can't be parsed as integer,
-   throws an exception.  */
+/* Parse an integer string into a ULONGEST and return it, or call
+   gdb_xml_error if it could not be parsed.  */
 
 ULONGEST
-xml_get_integer_attribute (const XML_Char **attrs,
-			   const XML_Char *attr)
+gdb_xml_parse_ulongest (struct gdb_xml_parser *parser, const char *value)
 {
   ULONGEST result;
-  const XML_Char *value = xml_get_required_attribute (attrs, attr);
 
   if (xml_parse_unsigned_integer (value, &result) != 0)
-    {
-      throw_error (XML_PARSE_ERROR,
-		   _("Can't convert value of attribute %s, %s, to integer"),
-		   attr, value);
-    }
+    gdb_xml_error (parser, _("Can't convert \"%s\" to an integer"), value);
+
   return result;
 }
 
-/* Obtains a value of attribute with enumerated type. In XML, enumerated
-   attributes have string as a value, and in C, they are represented as
-   values of enumerated type. This function maps the attribute onto
-   an integer value that can be immediately converted into enumerated
-   type.
-
-   First, obtains the string value of ATTR in ATTRS.
-   Then, finds the index of that value in XML_NAMES, which is a zero-terminated
-   array of strings. If found, returns the element of VALUES with that index.
-   Otherwise throws.  */
+/* Parse an integer attribute into a ULONGEST.  */
 
-int
-xml_get_enum_value (const XML_Char **attrs,
-		    const XML_Char *attr,
-		    const XML_Char **xml_names,
-		    int *values)
+void *
+gdb_xml_parse_attr_ulongest (struct gdb_xml_parser *parser,
+			     const struct gdb_xml_attribute *attribute,
+			     const char *value)
 {
-  const XML_Char *value = xml_get_required_attribute (attrs, attr);
+  ULONGEST result;
+  void *ret;
 
-  int i;
-  for (i = 0; xml_names[i]; ++i)
-    {
-      if (strcmp (xml_names[i], value) == 0)
-	return values[i];
-    }
-  throw_error (XML_PARSE_ERROR,
-	       _("Invalid enumerated value in XML: %s"), value);
+  if (xml_parse_unsigned_integer (value, &result) != 0)
+    gdb_xml_error (parser, _("Can't convert %s=\"%s\" to an integer"),
+		   attribute->name, value);
+
+  ret = xmalloc (sizeof (result));
+  memcpy (ret, &result, sizeof (result));
+  return ret;
+}
+
+/* Map NAME to VALUE.  A struct gdb_xml_enum * should be saved as the
+   value of handler_data when using gdb_xml_parse_attr_enum to parse a
+   fixed list of possible strings.  The list is terminated by an entry
+   with NAME == NULL.  */
+
+void *
+gdb_xml_parse_attr_enum (struct gdb_xml_parser *parser,
+			 const struct gdb_xml_attribute *attribute,
+			 const char *value)
+{
+  const struct gdb_xml_enum *enums = attribute->handler_data;
+  void *ret;
+
+  for (enums = attribute->handler_data; enums->name != NULL; enums++)
+    if (strcmp (enums->name, value) == 0)
+      break;
+
+  if (enums->name == NULL)
+    gdb_xml_error (parser, _("Unknown attribute value %s=\"%s\""),
+		 attribute->name, value);
+
+  ret = xmalloc (sizeof (enums->value));
+  memcpy (ret, &enums->value, sizeof (enums->value));
+  return ret;
 }
 
-/* Cleanup wrapper for XML_ParserFree, with the correct type
-   for make_cleanup.  */
+#endif /* HAVE_LIBEXPAT */
 
 static void
-free_xml_parser (void *parser)
+show_debug_xml (struct ui_file *file, int from_tty,
+		struct cmd_list_element *c, const char *value)
 {
-  XML_ParserFree (parser);
+  fprintf_filtered (file, _("XML debugging is %s.\n"), value);
 }
 
-/* Register a cleanup to release PARSER.  Only the parser itself
-   is freed; another cleanup may be necessary to discard any
-   associated user data.  */
+void _initialize_xml_support (void);
 
 void
-make_cleanup_free_xml_parser (XML_Parser parser)
+_initialize_xml_support (void)
 {
-  make_cleanup (free_xml_parser, parser);
+  add_setshow_boolean_cmd ("xml", class_maintenance, &debug_xml,
+			   _("Set XML parser debugging."),
+			   _("Show XML parser debugging."),
+			   _("When set, debugging messages for XML parsers "
+			     "are displayed."),
+			   NULL, show_debug_xml,
+			   &setdebuglist, &showdebuglist);
 }
-
-#endif /* HAVE_LIBEXPAT */
Index: src/gdb/xml-support.h
===================================================================
--- src.orig/gdb/xml-support.h	2006-12-06 16:03:34.000000000 -0500
+++ src/gdb/xml-support.h	2006-12-06 16:04:11.000000000 -0500
@@ -24,22 +24,164 @@
 #ifndef XML_SUPPORT_H
 #define XML_SUPPORT_H
 
-#include "gdb_expat.h"
+#include "gdb_obstack.h"
+#include "vec.h"
 
-/* Helper functions for parsing XML documents.  See xml-support.c
-   for more information about these functions.  */
+struct gdb_xml_parser;
+struct gdb_xml_element;
+struct gdb_xml_attribute;
+
+/* A name and value pair, used to record parsed attributes.  */
+
+struct gdb_xml_value
+{
+  const char *name;
+  void *value;
+};
+typedef struct gdb_xml_value gdb_xml_value_s;
+DEF_VEC_O(gdb_xml_value_s);
+
+/* The type of an attribute handler.
+
+   PARSER is the current XML parser, which should be used to issue any
+   debugging or error messages.  The second argument is the
+   corresponding attribute description, so that a single handler can
+   be used for multiple attributes; the attribute name is available
+   for error messages and the handler data is available for additional
+   customization (see gdb_xml_parse_attr_enum).  VALUE is the string
+   value of the attribute.
+
+   The returned value should be freeable with xfree, and will be freed
+   after the start handler is called.  Errors should be reported by
+   calling gdb_xml_error.  */
+
+typedef void *(gdb_xml_attribute_handler) (struct gdb_xml_parser *parser,
+					   const struct gdb_xml_attribute *,
+					   const char *value);
+
+/* Flags for attributes.  If no flags are specified, the attribute is
+   required.  */
+
+enum gdb_xml_attribute_flag
+{
+  GDB_XML_AF_NONE,
+  GDB_XML_AF_OPTIONAL = 1 << 0,  /* The attribute is optional.  */
+};
+
+/* An expected attribute and the handler to call when it is
+   encountered.  Arrays of struct gdb_xml_attribute are terminated
+   by an entry with NAME == NULL.  */
+
+struct gdb_xml_attribute
+{
+  const char *name;
+  int flags;
+  gdb_xml_attribute_handler *handler;
+  const void *handler_data;
+};
+
+/* Flags for elements.  If no flags are specified, the element is
+   required exactly once.  */
+
+enum gdb_xml_element_flag
+{
+  GDB_XML_EF_NONE,
+  GDB_XML_EF_OPTIONAL = 1 << 0,  /* The element is optional.  */
+  GDB_XML_EF_REPEATABLE = 1 << 1,  /* The element is repeatable.  */
+};
+
+/* A handler called at the beginning of an element.
+
+   PARSER is the current XML parser, which should be used to issue any
+   debugging or error messages.  ELEMENT is the current element.
+   USER_DATA is the opaque pointer supplied when the parser was
+   created.  ATTRIBUTES is a vector of the values of any attributes
+   attached to this element.
+
+   The start handler will only be called if all the required
+   attributes were present and parsed successfully, and elements of
+   ATTRIBUTES are guaranteed to be in the same order used in
+   ELEMENT->ATTRIBUTES (not the order from the XML file).  Accordingly
+   fixed offsets can be used to find any non-optional attributes as
+   long as no optional attributes precede them.  */
+
+typedef void (gdb_xml_element_start_handler)
+     (struct gdb_xml_parser *parser, const struct gdb_xml_element *element,
+      void *user_data, VEC(gdb_xml_value_s) *attributes);
+
+/* A handler called at the end of an element.
+
+   PARSER, ELEMENT, and USER_DATA are as for the start handler.  BODY
+   is any accumulated body text inside the element, with leading and
+   trailing whitespace removed.  It will never be NULL.  */
+
+typedef void (gdb_xml_element_end_handler)
+     (struct gdb_xml_parser *, const struct gdb_xml_element *,
+      void *user_data, const char *body_text);
+
+/* An expected element and the handlers to call when it is
+   encountered.  Arrays of struct gdb_xml_element are terminated
+   by an entry with NAME == NULL.  */
+
+struct gdb_xml_element
+{
+  const char *name;
+  const struct gdb_xml_attribute *attributes;
+  const struct gdb_xml_element *children;
+  int flags;
+
+  gdb_xml_element_start_handler *start_handler;
+  gdb_xml_element_end_handler *end_handler;
+};
+
+/* Initialize and return a parser.  Register a cleanup to destroy the
+   parser.  */
+
+struct gdb_xml_parser *gdb_xml_create_parser_and_cleanup
+  (const char *name, const struct gdb_xml_element *elements,
+   void *user_data);
+
+/* Invoke PARSER on BUFFER.  BUFFER is the data to parse, which
+   should be NUL-terminated.
+
+   The return value is 0 for success or -1 for error.  It may throw,
+   but only if something unexpected goes wrong during parsing; parse
+   errors will be caught, warned about, and reported as failure.  */
+
+int gdb_xml_parse (struct gdb_xml_parser *parser, const char *buffer);
+
+/* Issue a debugging message from one of PARSER's handlers.  */
+
+void gdb_xml_debug (struct gdb_xml_parser *parser, const char *format, ...)
+     ATTR_FORMAT (printf, 2, 0);
+
+/* Issue an error message from one of PARSER's handlers, and stop
+   parsing.  */
+
+void gdb_xml_error (struct gdb_xml_parser *parser, const char *format, ...)
+     ATTR_NORETURN ATTR_FORMAT (printf, 2, 0);
+
+/* Parse an integer attribute into a ULONGEST.  */
+
+extern gdb_xml_attribute_handler gdb_xml_parse_attr_ulongest;
+
+/* Map NAME to VALUE.  A struct gdb_xml_enum * should be saved as the
+   value of handler_data when using gdb_xml_parse_attr_enum to parse a
+   fixed list of possible strings.  The list is terminated by an entry
+   with NAME == NULL.  */
+
+struct gdb_xml_enum
+{
+  const char *name;
+  ULONGEST value;
+};
 
-const XML_Char *xml_get_required_attribute (const XML_Char **attrs,
-					    const XML_Char *attr);
+extern gdb_xml_attribute_handler gdb_xml_parse_attr_enum;
 
-ULONGEST xml_get_integer_attribute (const XML_Char **attrs,
-				    const XML_Char *attr);
+/* Parse an integer string into a ULONGEST and return it, or call
+   gdb_xml_error if it could not be parsed.  */
 
-int xml_get_enum_value (const XML_Char **attrs,
-			const XML_Char *attr,
-			const XML_Char **xml_names,
-			int *values);
-
-void make_cleanup_free_xml_parser (XML_Parser parser);
+ULONGEST gdb_xml_parse_ulongest (struct gdb_xml_parser *parser,
+				 const char *value);
 
 #endif
Index: src/gdb/Makefile.in
===================================================================
--- src.orig/gdb/Makefile.in	2006-12-06 16:03:34.000000000 -0500
+++ src/gdb/Makefile.in	2006-12-07 10:21:15.000000000 -0500
@@ -827,7 +827,7 @@ version_h = version.h
 wince_stub_h = wince-stub.h
 wrapper_h = wrapper.h $(gdb_h)
 xcoffsolib_h = xcoffsolib.h
-xml_support_h = xml-support.h $(gdb_expat_h)
+xml_support_h = xml-support.h $(gdb_obstack_h) $(vec_h)
 xtensa_tdep_h = xtensa-tdep.h 
 
 #
@@ -2879,7 +2879,7 @@ xcoffread.o: xcoffread.c $(defs_h) $(bfd
 xcoffsolib.o: xcoffsolib.c $(defs_h) $(bfd_h) $(xcoffsolib_h) $(inferior_h) \
 	$(gdbcmd_h) $(symfile_h) $(frame_h) $(gdb_regex_h)
 xml-support.o: xml-support.c $(defs_h) $(xml_support_h) $(exceptions_h) \
-	$(gdb_string_h) $(gdb_expat_h)
+	$(gdbcmd_h) $(gdb_string_h) $(gdb_expat_h) $(safe_ctype_h)
 xstormy16-tdep.o: xstormy16-tdep.c $(defs_h) $(frame_h) $(frame_base_h) \
 	$(frame_unwind_h) $(dwarf2_frame_h) $(symtab_h) $(gdbtypes_h) \
 	$(gdbcmd_h) $(gdbcore_h) $(value_h) $(dis_asm_h) $(inferior_h) \
Index: src/gdb/doc/gdb.texinfo
===================================================================
--- src.orig/gdb/doc/gdb.texinfo	2006-12-06 16:03:34.000000000 -0500
+++ src/gdb/doc/gdb.texinfo	2006-12-07 10:21:15.000000000 -0500
@@ -16283,6 +16283,11 @@ info. The default is off.
 @item show debugvarobj
 Displays the current state of displaying @value{GDBN} variable object
 debugging info.
+@item set debug xml
+@cindex XML parser debugging
+Turns on or off debugging messages for built-in XML parsers.
+@item show debug xml
+Displays the current state of XML debugging messages.
 @end table
 
 @node Sequences



More information about the Gdb-patches mailing list