Main Page   Alphabetical List   Data Structures   File List   Data Fields   Globals  

xmlt.c

00001 /*
00002  *  xmlt.c - XML data structure.
00003  *           This file is part of the FreeLCD package.
00004  *
00005  *  $Id: xmlt_8c-source.html,v 1.1 2003/02/16 22:50:41 unicorn Exp $
00006  *
00007  *  This program is free software; you can redistribute it and/or modify it
00008  *  under the terms of the GNU General Public License as published by the
00009  *  Free Software Foundation; either version 2 of the License, or (at your
00010  *  option) any later version.
00011  * 
00012  *  This program is distributed in the hope that it will be useful,
00013  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
00014  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00015  *  GNU General Public License for more details.
00016  *
00017  *  You should have received a copy of the GNU General Public License
00018  *  along with this program; if not, write to the Free Software
00019  *  Foundation, Inc., 59 Temple Place, Suite 330, Boston,
00020  *  MA  02111-1307  USA
00021  *
00022  *  Copyright (c) 2002, 2003, Jeroen van den Berg <unicorn@hippie.nu>
00023  */
00024 
00025 
00026 #if HAVE_CONFIG_H
00027 # include <config.h>
00028 #endif
00029 
00030 #if HAVE_STRING_H
00031 # include <string.h>
00032 #else
00033 # if HAVE_STRINGS_H
00034 #  include <strings.h>
00035 # endif
00036 #endif
00037 
00038 #include <assert.h>
00039 #include <expat.h>
00040 #include <stdio.h>
00041 
00042 #include "common/dictionary.h"
00043 #include "common/slist.h"
00044 #include "common/xmalloc.h"
00045 #include "xmlt.h"
00046 
00047 typedef struct
00048 {
00049   XML_Parser parser;            /* The Expat parser */
00050   xml_node *root_node;          /* First tag in the document is root */
00051   xml_node *curr_node;          /* Current node, used while parsing */
00052   void (*doc_finished) (void *, xml_node *);/* Callback for delivering finished
00053                                                documents */
00054   void *user_callback_data;     /* Data is passed to callback function */
00055   dictionary *known_tags;       /* The dictionary with the known tags,
00056                                    used for looking up a tag's index */
00057   dictionary *known_attributes; /* Same thing for attributes */
00058   int depth;                    /* Tree depth, used while parsing */
00059   int curr_is_cdata;            /* Working with character data at the
00060                                    moment, used while parsing */
00061   int valid;                    /* Document is still valid */
00062 }
00063 xml_context;
00064 
00065 
00066 /*----------------------------------------------------- _cleanup_string --*/
00067 static size_t
00068 _cleanup_string (const char *text, char **_dest, int len)
00069 {
00070   size_t i;
00071   size_t newlen = 0;
00072   char c;
00073   char* dest = *_dest;
00074 
00075   assert (text);
00076   assert (_dest && *_dest);
00077 
00078   if (len == 0)
00079     return 0;
00080 
00081   for (i = 0; i < len; ++i)
00082     {
00083       c = text[i];
00084       if (c == '\n' || c == '\r' || c == '\t')
00085         c = ' ';
00086 
00087       /* Don't put spaces at the beginning, or after another space */
00088       if (c != ' ' || newlen == 0 || dest[newlen - 1] != ' ')
00089         dest[newlen++] = c;
00090     }
00091 
00092   /* Don't store trailing spaces. */
00093   while (newlen > 0 && dest[newlen - 1] == ' ')
00094     --newlen;
00095 
00096   return newlen;
00097 }
00098 
00099 /*------------------------------------------------------- _attr_compare --*/
00100 static int
00101 _attr_compare (const void* data, const void* compare)
00102 {
00103   const xml_attribute *attr = (const xml_attribute*)data;
00104   int attr_t = *(int*)compare;
00105 
00106   return    attr->attribute != XML_UNKNOWN_ATTRIBUTE 
00107          && attr->attribute == attr_t;
00108 }
00109 
00110 /*----------------------------------------------------- _rescan_attribs --*/
00111 static void
00112 _rescan_attrib (void *data, void *userdata)
00113 {
00114   xml_attribute *attrib = (xml_attribute*)data; 
00115   dictionary *attr_dict = (dictionary*)userdata;
00116   int *lookup;
00117   
00118   if (attrib->attribute == XML_UNKNOWN_ATTRIBUTE)
00119     {
00120       lookup = dict_lookup (attr_dict, attrib->unknown_attrib);
00121       if (lookup != 0)
00122         {
00123           attrib->attribute = *lookup;
00124           free (attrib->unknown_attrib);
00125           attrib->unknown_attrib = 0;
00126         }
00127     }
00128 }
00129 
00130 /*------------------------------------------------------ _start_element --*/
00131 static void
00132 _start_element (void *cntxt, const char *name, const char **attr)
00133 {
00134   xml_node *newnode = xmalloc (sizeof (xml_node));
00135   xml_context *context = (xml_context *) cntxt;
00136   int *node_t;
00137 
00138   newnode->type = NODE;
00139   
00140   /*  Look up the node name in the dictionary. If it's not there, mark
00141    *  it simply as an unknown node.
00142    */
00143   node_t = dict_lookup (context->known_tags, name);
00144   if (!node_t)
00145     {
00146       newnode->tag = XML_UNKNOWN_TAG;
00147       newnode->cdata = xstrdup (name);
00148     }
00149   else
00150     {
00151       newnode->tag = *(int *) node_t;
00152       newnode->cdata = 0;
00153     }
00154 
00155   slist_init (&newnode->children);
00156   slist_init (&newnode->attributes);
00157 
00158   if (! context->root_node)
00159     {
00160       context->root_node = newnode;
00161       newnode->parent = 0;
00162     }
00163   else
00164     {
00165       newnode->parent = context->curr_node;
00166       slist_append (&context->curr_node->children, newnode);
00167     }
00168 
00169   while (attr && *attr)
00170     {
00171       xml_attribute *new_attr = xmalloc (sizeof (xml_attribute));
00172       int *attr_t = dict_lookup (context->known_attributes, *attr);
00173       
00174       if (!attr_t)
00175         {
00176           new_attr->attribute = XML_UNKNOWN_ATTRIBUTE;
00177           new_attr->unknown_attrib = xstrdup (*attr);
00178         }
00179       else
00180         {
00181           new_attr->attribute = *(int*)attr_t;
00182           new_attr->unknown_attrib = 0;
00183         }
00184       
00185       new_attr->value = xstrdup(*(attr + 1));
00186       slist_append (&newnode->attributes, new_attr);
00187       attr += 2;
00188     }
00189   
00190   context->curr_node = newnode;
00191   context->curr_is_cdata = 0;
00192   ++context->depth;
00193 }
00194 
00195 /*-------------------------------------------------------- _end_element --*/
00196 static void
00197 _end_element (void *cntxt, const char *name)
00198 {
00199   xml_context *context = (xml_context *) cntxt;
00200 
00201   --context->depth;
00202   if (!context->depth)
00203     {
00204       context->doc_finished (context->user_callback_data, context->root_node);
00205       context->curr_node = 0;
00206       context->root_node = 0;
00207     }
00208   else
00209     {
00210       context->curr_node = context->curr_node->parent;
00211       context->curr_is_cdata = 0;
00212     }
00213 }
00214 
00215 /*----------------------------------------------------- _character_data --*/
00216 static void
00217 _character_data (void *cntxt, const char *text, int len)
00218 {
00219   int         newlen;
00220   char        *newtext;
00221   xml_node    *new_node = xmalloc (sizeof (xml_node));
00222   xml_context *context = (xml_context *) cntxt;
00223   slist       *siblings;
00224 
00225   assert (cntxt);
00226   assert (text);
00227   
00228   newtext = xmalloc (len);
00229   newlen =_cleanup_string (text, &newtext, len);
00230   if (newlen == 0)
00231     {
00232       free (newtext);
00233       return;
00234     }
00235   
00236   xrealloc (newtext, newlen + 1);
00237   newtext[newlen] = 0;
00238 
00239   siblings = &(context->curr_node->children);
00240   if (context->curr_is_cdata)
00241     {
00242       /*  The parser was already handling character data. Just add this
00243        *  cdata to what was already there.
00244        */
00245       xml_node *last_node = slist_last (siblings);
00246       size_t cdata_len = strlen (last_node->cdata);
00247 
00248       last_node->cdata = xrealloc (last_node->cdata, cdata_len + newlen + 1);
00249       memcpy (last_node->cdata + cdata_len, newtext, newlen);
00250       last_node->cdata[cdata_len + cdata_len + newlen + 1] = 0;
00251     }
00252   else
00253     {
00254       new_node = xmalloc( sizeof (xml_node));
00255       new_node->type = CDATA;
00256       new_node->tag = -1;
00257       new_node->cdata = newtext;
00258       new_node->parent = context->curr_node;
00259   
00260       slist_append (siblings, new_node);
00261       context->curr_is_cdata = 1;
00262     }
00263 }
00264 
00265 /*----------------------------------------------------- _free_attribute --*/
00266 static void
00267 _free_attribute (void *_attr)
00268 {
00269   xml_attribute *attr = (xml_attribute *) _attr;
00270   assert (attr);
00271   free (attr->value);
00272   free (attr->unknown_attrib);
00273   free (attr);
00274 }
00275 
00276 
00277 /*-------------------------------------------------- xml_create_context --*/
00278 void *
00279 xmlt_create_context (void (*cb) (void *, xml_node *), void *cb_data,
00280                      dictionary * tags, dictionary * attribs)
00281 {
00282   xml_context *context = xmalloc (sizeof (xml_context));
00283 
00284   context->parser = XML_ParserCreate (0);
00285   if (!context->parser)
00286     return 0;
00287 
00288   XML_SetUserData (context->parser, context);
00289   XML_SetElementHandler (context->parser, _start_element, _end_element);
00290   XML_SetCharacterDataHandler (context->parser, _character_data);
00291 
00292   context->root_node          = 0;
00293   context->curr_node          = 0;
00294   context->user_callback_data = cb_data;
00295   context->known_tags         = tags;
00296   context->known_attributes   = attribs;
00297   context->depth              = 0;
00298   context->curr_is_cdata      = 0;
00299   context->doc_finished       = cb;
00300   context->valid              = 1;
00301 
00302   return context;
00303 }
00304 
00305 /*---------------------------------------------------------- xmlt_parse --*/
00306 int
00307 xmlt_parse (void *_context, const char *data, size_t len)
00308 {
00309   xml_context *context = (xml_context *) _context;
00310   
00311   if (!context || !context->valid)
00312     return 0;
00313 
00314   if (len == 0)
00315     return 1;
00316   
00317   assert (context->parser);
00318   assert (data);
00319 
00320   if (!XML_Parse (context->parser, data, len, 0))
00321     {
00322       context->valid = 0;
00323       return 0;
00324     }
00325 
00326   return 1;
00327 }
00328 
00329 /*--------------------------------------------------- xmlt_free_context --*/
00330 void
00331 xmlt_free_context (void *context)
00332 {
00333   assert (context);
00334   XML_ParserFree (((xml_context *) context)->parser);
00335 }
00336 
00337 /*-------------------------------------------------- xmlt_free_document --*/
00338 void
00339 xmlt_free_document (xml_node * doc)
00340 {
00341   slist_iter iter;
00342   int is_cdata = 1;
00343   void *p;
00344 
00345   assert (doc);
00346   iter = slist_begin_iter (&doc->children);
00347   slist_delete_special (&doc->attributes, _free_attribute);
00348 
00349   while (iter.curr)
00350     {
00351       p = slist_iter_and_next (&iter);
00352 
00353       if (is_cdata)
00354         free (p);
00355       else
00356         xmlt_free_document (p);
00357 
00358       /* Toggle is_cdata between 0 and 1 */
00359       is_cdata = -is_cdata + 1;
00360     }
00361 
00362   free (doc->cdata);
00363   free (doc);
00364 }
00365 
00366 /*------------------------------------------------------- xmlt_get_next --*/
00367 xml_node *
00368 xmlt_get_next (xml_node *iter, xml_node *doc)
00369 {
00370   assert (iter);
00371   assert (doc);
00372   
00373   if (iter->children.head)
00374     return (xml_node*)iter->children.head->data;
00375   
00376   while (iter->parent)
00377     {
00378       if (iter == doc)
00379         return 0;
00380 
00381       slist_iter i = slist_find (&(iter->parent->children), iter);
00382       if (i.curr->next)
00383         return (xml_node *) (i.curr->next->data);
00384 
00385       iter = iter->parent;
00386     }
00387 
00388   /* We shouldn't get here in theory, but, just in case. */
00389   return 0;
00390 }
00391 
00392 /*------------------------------------------------ xmlt_get_next_shallow --*/
00393 xml_node *
00394 xmlt_get_next_shallow (xml_node *iter)
00395 {
00396   slist_iter siblings;
00397   assert (iter);
00398   
00399   siblings = slist_find (&(iter->parent->children), iter);
00400 
00401   if (siblings.curr->next)
00402     iter = (xml_node *)(siblings.curr->next->data);
00403   else
00404     iter = 0;
00405 
00406   return iter;
00407 }
00408 
00409 /*------------------------------------------------ xmlt_rescan_document --*/
00410 void
00411 xmlt_rescan_document (xml_node * doc, dictionary * tags, dictionary * attribs)
00412 {
00413   int *tag_ptr;
00414   xml_node *curr_node  = doc;
00415 
00416   while (curr_node)
00417     {
00418       if (curr_node->type == NODE && curr_node->tag == XML_UNKNOWN_TAG)
00419         {
00420           /*  The unknown tag is stored as a string in 'cdata'.  If we
00421            *  can find it back in our own dictionary, replace the value in
00422            *  'tag' (which is XML_UNKNOWN_TAG) with the value from our
00423            *  dictionary, and free the memory that was allocated for the
00424            *  string. 
00425            */
00426           tag_ptr = dict_lookup (tags, curr_node->cdata);
00427           if (tag_ptr != 0)
00428             {
00429               curr_node->tag = *tag_ptr;
00430               free (curr_node->cdata);
00431               curr_node->cdata = 0;
00432 
00433               if (attribs)
00434                 {
00435                   slist_for_each (&curr_node->attributes, _rescan_attrib, 
00436                                   attribs);
00437                 }
00438             }
00439         }
00440 
00441       curr_node = xmlt_get_next (curr_node, doc);
00442     }
00443 }
00444 
00445 /*----------------------------------------------------------- xmlt_find --*/
00446 xml_node *
00447 xmlt_find (xml_node *doc, xml_node* iter, int tag)
00448 {
00449   if (!iter)
00450     iter = xmlt_get_next (doc, doc);
00451   else
00452     iter = xmlt_get_next_shallow (iter);
00453 
00454   while (iter && iter->tag != tag)
00455     iter = xmlt_get_next_shallow (iter);
00456   
00457   return iter;
00458 }
00459 
00460 /*-------------------------------------------------------- xmlt_find_if --*/
00461 xml_node *
00462 xmlt_find_if (xml_node *doc, xml_node* iter, int(*compare)(xml_node*))
00463 {
00464   if (!iter)
00465     iter = xmlt_get_next (doc, doc);
00466   else
00467     iter = xmlt_get_next_shallow (iter);
00468 
00469   while (iter && !compare (iter))
00470     iter = xmlt_get_next_shallow (iter);
00471   
00472   return iter;
00473 }
00474 
00475 /*----------------------------------------------------- xmlt_get_attrib --*/
00476 const char*
00477 xmlt_get_attrib (xml_node *node, int attribute)
00478 {
00479   slist_iter i = slist_find_if (&node->attributes, &attribute, _attr_compare);
00480   
00481   if(i.curr)
00482     return ((xml_attribute*)i.curr->data)->value;
00483 
00484   return 0;
00485 } 
00486 
00487 /*------------------------------------------------ xmlt_get_first_cdata --*/
00488 const char*
00489 xmlt_get_first_cdata (xml_node *node)
00490 {
00491   xml_node *first_child;
00492 
00493   if (!node)
00494     return 0;
00495   
00496   first_child = (xml_node*)node->children.head->data;
00497 
00498   return (first_child && first_child->cdata) ? first_child->cdata : 0;
00499 }
00500 
00501 
00502 #ifdef UNIT_TEST_XMLT_C
00503 
00504 /* UNIT_CFLAGS -O -lexpat */
00505 /* UNIT_EXTRA  ../common/xmalloc.c ../common/slist.c ../common/dictionary.c */
00506 
00507 void test_callback (void *userdata, xml_node *document)
00508 {
00509   *(xml_node**)userdata = document;
00510 }
00511 
00512 void
00513 test_next_tag (xml_node **doc_iter, xml_node *doc, int tag)
00514 {
00515   *doc_iter = xmlt_get_next (*doc_iter, doc);
00516   
00517   if (!doc_iter || !*doc_iter)
00518     {
00519       printf ("xmlt_get_next() failed, iterator is NULL\n");
00520       exit (1);
00521     }
00522 
00523   if ((**doc_iter).type != NODE)
00524     {
00525       printf ("xmlt_get_next() failed, got CDATA, expected NODE\n");
00526       exit (1);
00527     }
00528 
00529   if ((**doc_iter).tag != tag)
00530     {
00531       printf ("xmlt_get_next() failed, tag is %i instead of %i\n",
00532               (**doc_iter).tag, tag);
00533       exit (1);
00534     }
00535 }
00536 
00537 void
00538 test_next_cdata (xml_node **doc_iter, xml_node *doc, const char* cdata)
00539 {
00540   *doc_iter = xmlt_get_next (*doc_iter, doc);
00541   
00542   if (!doc_iter || !*doc_iter)
00543     {
00544       printf ("xmlt_get_next() failed, iterator is NULL\n");
00545       exit (1);
00546     }
00547 
00548   if ((**doc_iter).type != CDATA)
00549     {
00550       printf ("xmlt_get_next() failed, got NODE, expected CDATA\n");
00551       exit (1);
00552     }
00553 
00554   if (strcmp((**doc_iter).cdata, cdata))
00555     {
00556       printf ("xmlt_get_next() failed, cdata is '%s' instead of '%s'.\n",
00557               (**doc_iter).cdata, cdata);
00558       exit (1);
00559     }
00560 }
00561 
00562 int main (int argc, char **argv)
00563 {
00564   int tag_array[]  = { 1, 2, 3, 4 };
00565   int attr_array[] = { 10, 20, 30, 40 };
00566   xml_node *finished_doc = 0;
00567   xml_node *doc_iter;
00568   const char* attr_value;
00569   
00570   dict_pair tag_dict[] = 
00571     {
00572       { "four", &tag_array[3] },
00573       { "one", &tag_array[0] },
00574       { "three", &tag_array[2] },
00575       { "two", &tag_array[1] }
00576     };
00577   
00578   dict_pair attr_dict[] = 
00579     {
00580         { "dos", &attr_array[1] },
00581         { "quatro", &attr_array[3] },
00582         { "tres", &attr_array[2] },
00583         { "uno", &attr_array[0] }
00584     };
00585   
00586   dictionary tags  = { tag_dict , 4 };
00587   dictionary attrs = { attr_dict, 4 };
00588   
00589   int read;
00590   
00591   char *xmldata = "<one><two>Inside two</two><three uno=\"een\" dos=\"twee\">" \
00592     "<four tres=\"drie\" quatro=\"vier\">Inside four</four></three>" \
00593     "<nonexistent />Closing</one>";
00594   
00595   xml_context* context = xmlt_create_context (test_callback, &finished_doc, 
00596                                               &tags, &attrs);
00597   
00598   if (!context)
00599     {
00600       printf ("xmlt_create_context() failed\n");
00601       exit (1);
00602     }
00603 
00604   if (!xmlt_parse (context, xmldata, strlen(xmldata)))
00605     {
00606       printf ("xmlt_parse() failed\n");
00607       exit (1);
00608     }
00609 
00610   if (!finished_doc)
00611     {
00612       printf ("xmlt_parse() failed, no document returned\n");
00613       exit (1);
00614     }
00615 
00616   doc_iter = finished_doc;
00617   if (doc_iter->tag != tag_array[0])
00618     {
00619       printf ("xmlt_parse() failed, root tag is %i instead of %i\n",
00620               doc_iter->tag, tag_array[0]);
00621       exit (1);
00622     }
00623   
00624   if (doc_iter->parent != 0)
00625     {
00626       printf ("xmlt_parse() failed, first tag is not root tag\n");
00627       exit (1);
00628     }
00629   
00630   test_next_tag(&doc_iter, finished_doc, tag_array[1]);
00631   
00632   attr_value = xmlt_get_attrib (doc_iter, attr_array[0]);
00633   if (attr_value)
00634     {
00635       printf ("xmlt_get_attribute() returned something.\n");
00636       exit (1);
00637     }
00638   
00639   test_next_cdata(&doc_iter, finished_doc, "Inside two");
00640   test_next_tag(&doc_iter, finished_doc, tag_array[2]);
00641 
00642   attr_value = xmlt_get_attrib (doc_iter, attr_array[0]);
00643   if (!attr_value)
00644     {
00645       printf ("xmlt_get_attribute() didn't return anything.\n");
00646       exit (1);
00647     }
00648   
00649   if (strcmp(attr_value, "een"))
00650       {
00651         printf ("xmlt_get_attribute() returned '%s' instead of 'een'.\n",
00652                 attr_value);
00653         exit (1);
00654       }
00655       
00656   test_next_tag(&doc_iter, finished_doc, tag_array[3]);
00657   test_next_cdata(&doc_iter, finished_doc, "Inside four");
00658   
00659   
00660   return 0;
00661 }
00662 
00663 #endif

Generated on Sun Feb 16 23:39:49 2003 for FreeLCD by doxygen1.2.18