#include "nse_xml.h" extern "C" { #include #include } typedef struct XmlDocumentData { xmlDocPtr document; } XmlDocumentData; // create a lua XmlDocument int create_document( lua_State * L, xmlDocPtr doc ) { if ( doc ) { // parsing successful XmlDocumentData * doc_data; doc_data = (XmlDocumentData *) lua_newuserdata( L, sizeof(XmlDocumentData)); // set metatable for userdata luaL_getmetatable( L, "XmlDocument" ); lua_setmetatable( L, -2 ); doc_data->document = doc; } else { // parsing failed luaL_error( L , "parsing document failed." ); } return 1; } // takes an html document as string and returns a XmlDocument int xml_parse_html( lua_State * L ) { const char * doc_string = luaL_checkstring( L, 1 ); lua_pop(L, 1); char * url = NULL; char * encoding = NULL; int options = HTML_PARSE_RECOVER | HTML_PARSE_NOERROR | HTML_PARSE_NOWARNING | HTML_PARSE_NONET; htmlDocPtr doc = htmlReadDoc( (xmlChar *) doc_string, url, encoding, options ); return create_document( L, doc ); } // takes an xml document as string and returns a XmlDocument int xml_parse_xml( lua_State * L ) { const char * doc_string = luaL_checkstring( L, 1 ); lua_pop(L, 1); char * url = NULL; char * encoding = NULL; int options = XML_PARSE_RECOVER | XML_PARSE_NOERROR | XML_PARSE_NOWARNING | XML_PARSE_NONET; xmlDocPtr doc = xmlReadDoc( (xmlChar *) doc_string, url, encoding, options ); return create_document( L, doc ); } static const struct luaL_reg xml_methods[] = { { "parse_html", xml_parse_html }, { "parse_xml", xml_parse_xml }, { NULL, NULL } }; // takes an xpath expression and return the match(es) as string(s) int xmldoc_find( lua_State * L ) { XmlDocumentData * doc = (XmlDocumentData *) luaL_checkudata(L, 1, "XmlDocument"); const char * xpath = luaL_checkstring( L, 2 ); xmlXPathContextPtr context = xmlXPathNewContext( doc->document ); if ( !context ) luaL_error( L, "Error creating context." ); xmlXPathObjectPtr result = xmlXPathEvalExpression((xmlChar *) xpath, context); xmlXPathFreeContext(context); if ( !result ) luaL_error( L, "Error while evaluating XPath expression." ); if( xmlXPathNodeSetIsEmpty( result->nodesetval ) ) { // empty resultset lua_pushnil( L ); xmlXPathFreeNodeSetList( result ); return 1; } else { // we found something .. return first match as string xmlNodeSetPtr nodeset = result->nodesetval; char * tmp = (char *) xmlXPathCastNodeToString( nodeset->nodeTab[0] ); lua_pushstring( L, tmp ); free( tmp ); xmlXPathFreeNodeSetList( result ); return 1; } } // takes an xpath expression and return the match(es) as table containing the string(s) int xmldoc_find_all( lua_State * L ) { XmlDocumentData * doc = (XmlDocumentData *) luaL_checkudata(L, 1, "XmlDocument"); const char * xpath = luaL_checkstring( L, 2 ); xmlXPathContextPtr context = xmlXPathNewContext( doc->document ); if ( !context ) luaL_error( L, "Error creating context." ); xmlXPathObjectPtr result = xmlXPathEvalExpression((xmlChar *) xpath, context); xmlXPathFreeContext(context); if ( !result ) luaL_error( L, "Error while evaluating XPath expression." ); lua_newtable( L ); if( xmlXPathNodeSetIsEmpty( result->nodesetval ) ) { // empty resultset } else { int i; // we found something xmlNodeSetPtr nodeset = result->nodesetval; for ( i = 0; i < nodeset->nodeNr; i++) { lua_pushnumber( L, i + 1 ); char * tmp = (char *) xmlXPathCastNodeToString( nodeset->nodeTab[i] ); lua_pushstring( L, tmp ); free( tmp ); lua_rawset( L, -3 ); } } xmlXPathFreeNodeSetList( result ); return 1; } int xmldoc_free( lua_State * L ) { XmlDocumentData * doc = (XmlDocumentData *) luaL_checkudata(L, 1, "XmlDocument"); free( doc->document ); return 0; } // XmlDocument methods static const struct luaL_reg xmldoc_methods[] = { { "find", xmldoc_find }, { "find_all", xmldoc_find_all}, { "__gc", xmldoc_free }, { NULL, NULL } }; // initializer function, called when library is required int luaopen_xml( lua_State * L ) { // create metatable luaL_newmetatable( L, "XmlDocument" ); // metatable.__index = metatable lua_pushvalue( L, -1 ); lua_setfield( L, -2, "__index" ); // register methods luaL_register( L, NULL, xmldoc_methods ); luaL_register( L, "xml", xml_methods ); return 1; }