sasa_c_xml.c

Go to the documentation of this file.
00001 /* ---------------------------------------------------- */
00002 /* Copyright 2006-2010, CERFACS, Toulouse, France. */
00003 /* Copyright 2006-2010, Centre National de la Recherche Scientifique, Paris, France. */
00004 /* All rights reserved. Use is subject to OASIS4 license terms. */
00005 /* ---------------------------------------------------- */
00006 /*
00007    file sasa_c_xml.c
00008    C source file for sasa
00009 */
00010 
00011 
00012 #include <stdlib.h>
00013 #include <stdio.h>
00014 #include <string.h>
00015 #include <libxml/parser.h>
00016 #include <libxml/xpath.h>
00017 #include <libxml/xpathInternals.h>
00018 #include "sasa_c_xml.h"
00019 
00020 /*
00021  
00022   !DESCRIPTION:
00023  
00024    This file gathers the c interface of the xml extraction tools. 
00025    The extraction is done with the 'xpath' technics.
00026 
00027   !FILES USED:
00028 
00029          <stdio.h>
00030          <string.h>
00031          <stdlib.h>
00032          <libxml/parser.h>
00033          <libxml/xpath.h>
00034          "sasa_c_f90.h"
00035 
00036   !REVISION HISTORY:
00037 
00038     Date      Programmer   Description
00039   ----------  ----------   -----------
00040    ??.??.??   P. Bourcier   Creation
00041    30.01.04   D. Declat     The character management has been modified.
00042                             Some routines had been deleted.
00043    21.05.07   F. Dufour     Fix possible memory access violation and minors memory leaks
00044    04.06.07   F. Dufour     Add support for namespace prefix
00045    20.06.07   F. Dufour     Add support for default namespace in xpath expression
00046    22.03.10   JM Epitalon   Simultaneous access to multiple files
00047 //EOP
00048 
00049  ----------------------------------------------------------------------
00050   $Id: sasa_c_xml.c 2399 2010-06-21 08:09:39Z coquart $
00051   $Author: coquart $
00052  ---------------------------------------------------------------------- */
00053 
00054 /* table of pointers to open documents */
00055 #define MAX_OPEN_DOCUMENTS    100
00056 xmlDocPtr open_doc[MAX_OPEN_DOCUMENTS];
00057 /* Number of open document */
00058 int nb_open_doc = -1;
00059 
00060 int
00061 sasaOpenXml(const char *filename)
00062 {
00063     xmlDocPtr doc;
00064     int num_doc;
00065         int i;
00066     
00067     /* Initialise table if not yet done */
00068     if (nb_open_doc == -1)
00069     {
00070         for (i = 0; i < MAX_OPEN_DOCUMENTS; i += 1)
00071             open_doc[i] = NULL;
00072         nb_open_doc = 0;
00073     }
00074 
00075     if (nb_open_doc == MAX_OPEN_DOCUMENTS)
00076     {
00077         fprintf(stderr, "%s:%d: number of open XML documents is too large.\n", 
00078                   __FILE__, __LINE__);
00079         return -1;
00080     }
00081     else
00082     {
00083         /* parse an XML file and build a tree */
00084         doc = xmlParseFile(filename);
00085         if(doc == (xmlDocPtr) NULL)
00086         {
00087             fprintf(stderr, "%s:%d: document not parsed successfully.\n", 
00088                     __FILE__, __LINE__);
00089             return -1;
00090         }
00091         else
00092         {
00093             /* Look for an empty slot in open doc table */
00094             for (num_doc = 0; open_doc[num_doc] != NULL; num_doc += 1)
00095                 ;
00096             /* store pointer to open doc into slot */
00097             open_doc[num_doc] = doc;
00098             nb_open_doc += 1;
00099             
00100             return num_doc;
00101         }
00102     }
00103 }  /* end sasaOpenXml */
00104 
00105 
00106 int
00107 sasaCloseXml(int num_doc)
00108 {
00109     xmlDocPtr doc;
00110     
00111     /* free the document */
00112     doc = open_doc[num_doc];
00113     xmlFreeDoc(doc);
00114     
00115     /* reset pointer to open doc */
00116     open_doc[num_doc] = NULL;
00117     nb_open_doc -= 1;
00118     
00119     return EXIT_SUCCESS;
00120     
00121 }  /* end sasaCloseXml */
00122 
00123 
00124 /* - = - = - = - = - = - = - = - = - = - = - = - = - = - = - = */
00125 
00126 
00127 int
00128 getXmlNodeCount(int num_doc, const char *nodeName, int *nodeNumber)
00129 {
00130     xmlDocPtr doc;
00131     size_t length = 0;
00132     char *xpathBase = "count (//", *xpath = NULL;
00133     char *nodeNameNs = NULL;
00134     xmlNodePtr  root = NULL;
00135 
00136     xmlXPathContextPtr context    = NULL;
00137     xmlChar            *xpathExpr = NULL;
00138     xmlXPathObjectPtr  result     = NULL;
00139     
00140     doc = open_doc[num_doc];
00141     root = xmlDocGetRootElement(doc);
00142     
00143     if(root == NULL)
00144     {
00145         printf("XML File Error : No root element !\n");
00146         *nodeNumber = 0;
00147         
00148         return EXIT_FAILURE;
00149     }
00150 
00151     if(nodeName == NULL || (*nodeName) == '\0')
00152     {
00153         printf("No node name to count !\n");
00154         *nodeNumber = 0;
00155         
00156         return EXIT_FAILURE;
00157     }
00158     
00159     context = xmlXPathNewContext(doc);
00160     
00161     
00162     /* Register prefixed namespace(s) defined in the root node */
00163     if(root->ns != NULL)
00164     {
00165         xmlNsPtr ns = root->nsDef->next;
00166         
00167         xmlXPathRegisterNs(context, (const xmlChar *)root->nsDef->prefix, (const xmlChar *)root->nsDef->href);
00168         
00169         do
00170         {
00171             xmlXPathRegisterNs(context, (const xmlChar *)ns->prefix, (const xmlChar *)ns->href);
00172             
00173             ns = ns->next;
00174         }while(ns != NULL);
00175     }
00176     
00177     /* If a default namespace is defined
00178      *  
00179      * IMPORTANT: XPath 1.0 has no concept of a default namespace. Unprefixed names in XPath only match names which have no namespace.
00180      * So, if the document uses a default namespace, it is required to associate a non-empty prefix with the default namespace
00181      * via register-namespace  and add that prefix to names in XPath expressions intended to match nodes in the default namespace.
00182     */
00183 
00184     if(root->ns && !root->ns->prefix)
00185     {
00186         char *c, *start;
00187         char **nodeNameNsTab = NULL;
00188         unsigned int nb_tab  = 1, i, nb_tab_alloc = 10;
00189         
00190         xmlXPathRegisterNs(context, (const xmlChar *)"default", root->ns->href);
00191         
00192         c = start = (char *)nodeName;
00193         
00194         nodeNameNsTab = (char **)calloc(nb_tab_alloc, sizeof(char *));
00195         
00196         do
00197         {
00198             if((*c) == '/')
00199             {
00200                 nodeNameNsTab[nb_tab - 1] = (char *)calloc((c - start) + 1, 1);
00201                 strncpy(nodeNameNsTab[nb_tab - 1], (const char *)start, c - start);
00202                 if (nb_tab == nb_tab_alloc)
00203                 {
00204                     nb_tab_alloc += 10;
00205                     nodeNameNsTab = (char **)realloc(nodeNameNsTab, nb_tab_alloc * sizeof(char *));
00206                 }
00207                 nb_tab += 1;
00208                 start = c + 1;
00209             }
00210             
00211             ++c;
00212         }while((*c) != '\0');
00213         
00214         nodeNameNsTab[nb_tab - 1] = (char *)calloc((c - start) + 1, 1);
00215         strcpy(nodeNameNsTab[nb_tab - 1], (const char *)start);
00216         
00217         for(i = 0; i < nb_tab; ++i)
00218         {
00219             char *tmp = NULL;
00220             /* : and @ punctuation mark detection */
00221             unsigned int detected = 0; 
00222             c = (char *)nodeNameNsTab[i];
00223             
00224             do
00225             {
00226                 if((*c) == ':' || (*c) == '@')
00227                 {
00228                     detected = 1;
00229                     start = c + 1;
00230                 }
00231                 ++c;
00232             }while((*c) != '\0');
00233             
00234             if(!detected)
00235             {
00236                 tmp = (char *)calloc(strlen(nodeNameNsTab[i]) + 9, 1);
00237                 strcpy(tmp, "default:");
00238                 strcpy(&tmp[8], (const char *)nodeNameNsTab[i]);
00239 
00240                 free(nodeNameNsTab[i]);
00241                 nodeNameNsTab[i] = tmp;
00242             }
00243         }
00244 
00245 
00246         length = strlen(xpathBase) + nb_tab;
00247         
00248         for(i = 0; i < nb_tab; ++i)
00249         {
00250             length += strlen(nodeNameNsTab[i]);
00251             
00252         }
00253         
00254         xpath  = (char *)calloc(length + 1, 1);
00255         
00256         strcpy(xpath, xpathBase);
00257         
00258         for(i = 0; i < nb_tab; ++i)
00259         {
00260             if(i)
00261                 strcat(xpath, "/");
00262             strcat(xpath, (const char *)nodeNameNsTab[i]);
00263             free(nodeNameNsTab[i]);
00264         }
00265         
00266         free(nodeNameNsTab);
00267         
00268         strcat(xpath, ")");
00269         
00270     }
00271     else
00272     {
00273         nodeNameNs = (char *)nodeName;
00274         
00275         length = strlen(xpathBase) + strlen(nodeNameNs) + 1;
00276         xpath  = (char *) calloc(length + 1, 1);
00277         
00278         if(xpath == NULL)
00279         { 
00280             printf("%s:%d: realloc failed.\n", __FILE__, __LINE__);
00281             *nodeNumber = 0;
00282             
00283             return EXIT_FAILURE;
00284         }
00285         
00286         strcpy(xpath, xpathBase);
00287         strcat(xpath, nodeNameNs);
00288         strcat(xpath, ")");
00289     }
00290     
00291     
00292     
00293     xpathExpr = (xmlChar*) xpath;
00294     result = xmlXPathEvalExpression(xpathExpr, context);
00295     
00296     free(xpath);
00297     
00298     if(result->type != XPATH_NUMBER)
00299     {
00300         *nodeNumber = 0;
00301 #ifdef DEBUG
00302         printf("getXmlNodeCount : No result\n");
00303 #endif
00304         xmlXPathFreeObject(result);
00305         xmlXPathFreeContext(context);
00306         
00307         return EXIT_NOTFOUND;
00308     }
00309     else
00310     {
00311         *nodeNumber = (int) result->floatval;
00312 #ifdef DEBUG
00313         printf("getXmlNodeCount : nodeNumber : %d\n", *nodeNumber);
00314 #endif
00315         xmlXPathFreeObject(result);
00316         xmlXPathFreeContext(context);
00317         
00318         return EXIT_FOUND;
00319     }  
00320     
00321     
00322 }  /* end sasaGetXmlNodeCount */
00323 
00324 
00325 /* - = - = - = - = - = - = - = - = - = - = - = - = - = - = - = */
00326 
00327 
00328 int
00329 getXmlInfo(int num_doc, char *xpathSearchString, char **xpathResultString)
00330 {
00331     xmlDocPtr doc;
00332     xmlXPathContextPtr context    = NULL;
00333     xmlChar            *xpathExpr = NULL;
00334     xmlXPathObjectPtr  result     = NULL;
00335     xmlNodeSetPtr      nodeSet    = NULL;
00336     xmlNodePtr         *curNode   = NULL;
00337     
00338     xmlNodePtr  root = NULL;
00339     doc = open_doc[num_doc];
00340     root = xmlDocGetRootElement(doc);
00341     
00342     context = xmlXPathNewContext(doc);
00343     
00344     /* Register prefixed namespace(s) defined in the root node */
00345     if(root->ns != NULL)
00346     {
00347         xmlNsPtr ns = root->nsDef->next;
00348         
00349         xmlXPathRegisterNs(context, (const xmlChar *)root->nsDef->prefix, (const xmlChar *)root->nsDef->href);
00350         
00351         do
00352         {
00353             xmlXPathRegisterNs(context, (const xmlChar *)ns->prefix, (const xmlChar *)ns->href);
00354             
00355             ns = ns->next;
00356         }while(ns != NULL);
00357     }
00358     
00359     
00360     /* If a default namespace is defined
00361      *  
00362      * IMPORTANT: XPath 1.0 has no concept of a default namespace. Unprefixed names in XPath only match names which have no namespace.
00363      * So, if the document uses a default namespace, it is required to associate a non-empty prefix with the default namespace
00364      * via register-namespace  and add that prefix to names in XPath expressions intended to match nodes in the default namespace.
00365     */
00366 
00367     if(root->ns && !root->ns->prefix)
00368     {
00369         size_t length = 0;
00370         char *c, *start;
00371         char *xpath = NULL;
00372         char **nodeNameNsTab = NULL;
00373         unsigned int nb_tab  = 1, i, nb_tab_alloc = 10;
00374         
00375         xmlXPathRegisterNs(context, (const xmlChar *)"default", root->ns->href);
00376         printf("default namespace\n");
00377         c = (char *)xpathSearchString;
00378         
00379         c += 2;
00380         start = c;
00381         
00382         nodeNameNsTab = (char **)calloc(nb_tab_alloc, sizeof(char *));
00383         
00384         do
00385         {
00386             if((*c) == '/')
00387             {
00388                 nodeNameNsTab[nb_tab - 1] = (char *)calloc((c - start) + 1, 1);
00389                 strncpy(nodeNameNsTab[nb_tab - 1], (const char *)start, c - start);
00390                 if (nb_tab == nb_tab_alloc)
00391                 {
00392                     nb_tab_alloc += 10;
00393                     nodeNameNsTab = (char **)realloc(nodeNameNsTab, nb_tab_alloc * sizeof(char *));
00394                 }
00395                 nb_tab += 1;
00396                 start = c + 1;
00397             }
00398             
00399             ++c;
00400         }while((*c) != '\0');
00401         
00402         nodeNameNsTab[nb_tab - 1] = (char *)calloc((c - start) + 1, 1);
00403         strcpy(nodeNameNsTab[nb_tab - 1], (const char *)start);
00404         
00405         for(i = 0; i < nb_tab; ++i)
00406         {
00407             char *tmp = NULL;
00408             /* : and @ punctuation mark detection */
00409             unsigned int detected = 0; 
00410             c = (char *)nodeNameNsTab[i];
00411             
00412             do
00413             {
00414                 if((*c) == ':' || (*c) == '@')
00415                 {
00416                     detected = 1;
00417                     start = c + 1;
00418                 }
00419                 ++c;
00420             }while((*c) != '\0');
00421             
00422             if(!detected)
00423             {
00424                 tmp = (char *)calloc(strlen(nodeNameNsTab[i]) + 9, 1);
00425                 strcpy(tmp, "default:");
00426                 strcpy(&tmp[8], (const char *)nodeNameNsTab[i]);
00427 
00428                 free(nodeNameNsTab[i]);
00429                 nodeNameNsTab[i] = tmp;
00430             }
00431         }
00432 
00433 
00434         length = nb_tab;
00435         
00436         for(i = 0; i < nb_tab; ++i)
00437         {
00438             length += strlen(nodeNameNsTab[i]);
00439             
00440         }
00441         
00442         xpath  = (char *)calloc(length + 3, 1);
00443         
00444         strcpy(xpath, "//");
00445         
00446         for(i = 0; i < nb_tab; ++i)
00447         {
00448             if(i)
00449                 strncat(xpath, "/", 1);
00450             strcat(xpath, (const char *)nodeNameNsTab[i]);
00451             free(nodeNameNsTab[i]);
00452         }
00453         
00454         free(nodeNameNsTab);
00455         
00456         xpathExpr = (xmlChar *)xpath;
00457     }
00458     else
00459     {
00460         xpathExpr = (xmlChar *) xpathSearchString;
00461     }
00462     
00463 #ifdef DEBUG
00464     printf("getXmlInfo : xpathExpr: %s\n", xpathExpr);
00465 #endif
00466     
00467     /* evaluate the XPath expression in the given context */
00468     result  = xmlXPathEvalExpression(xpathExpr, context);
00469     nodeSet = result->nodesetval;
00470 
00471     /* free allocated memory if xpathExpr was computed */
00472     if (xpathExpr != (xmlChar *) xpathSearchString)
00473         free(xpathExpr);
00474     
00475     if(xmlXPathNodeSetIsEmpty(nodeSet))
00476     {
00477 #ifdef DEBUG
00478       printf("getXmlInfo : No result here ! %d\n", EXIT_NOTFOUND);
00479 #endif
00480       *xpathResultString = NULL;
00481       xmlXPathFreeObject(result);
00482       xmlXPathFreeContext(context);
00483 
00484       return EXIT_NOTFOUND;
00485     }
00486     else
00487     {
00488       curNode = nodeSet->nodeTab;
00489       *xpathResultString = (char*) xmlNodeListGetString(doc, (*curNode)->children, 1);
00490 #ifdef DEBUG
00491       printf("getXmlInfo : xpathResultString: %s\n", *xpathResultString);
00492 #endif     
00493       xmlXPathFreeObject(result);
00494       xmlXPathFreeContext(context);
00495 
00496       return EXIT_FOUND;
00497     }
00498     
00499 }  /* end getXmlInfo */

Generated on 18 Mar 2011 for Oasis4 by  doxygen 1.6.1