From 9c5eed5b64b796095510e1dc441365d290f44585 Mon Sep 17 00:00:00 2001 From: Mike Dalessio Date: Thu, 21 Oct 2021 17:04:57 -0400 Subject: [PATCH 1/4] fix: extract xpath2ruby C function and fix memory leak in xml_xpath_context.c, both functions `evaluate` and `Nokogiri_marshal_xpath_funcall_and_return_values` had code to convert xpath objects to Ruby objects. `Nokogiri_marshal_xpath_funcall_and_return_values` had a memory leak while `evaluate` did not. I've extracted this logic to a single method, and called it from both places, thereby eliminating the memory leak along with DRYing things up. Also, I've added a bit of test coverage to some of the marshalling. --- ext/nokogiri/xml_xpath_context.c | 99 ++++++++++++++++---------------- test/xml/test_xpath.rb | 12 +++- 2 files changed, 60 insertions(+), 51 deletions(-) diff --git a/ext/nokogiri/xml_xpath_context.c b/ext/nokogiri/xml_xpath_context.c index b762a6787e..93dbc4413a 100644 --- a/ext/nokogiri/xml_xpath_context.c +++ b/ext/nokogiri/xml_xpath_context.c @@ -128,11 +128,44 @@ register_variable(VALUE self, VALUE name, VALUE value) return self; } + +/* + * convert an XPath object into a Ruby object of the appropriate type. + * returns Qundef if no conversion was possible. + */ +static VALUE +xpath2ruby(xmlXPathObjectPtr xobj, xmlXPathContextPtr xctx) +{ + VALUE retval; + + assert(xctx->doc); + assert(DOC_RUBY_OBJECT_TEST(xctx->doc)); + + switch (xobj->type) { + case XPATH_STRING: + retval = NOKOGIRI_STR_NEW2(xobj->stringval); + xmlFree(xobj->stringval); + return retval; + + case XPATH_NODESET: + return noko_xml_node_set_wrap(xobj->nodesetval, + DOC_RUBY_OBJECT(xctx->doc)); + + case XPATH_NUMBER: + return rb_float_new(xobj->floatval); + + case XPATH_BOOLEAN: + return (xobj->boolval == 1) ? Qtrue : Qfalse; + + default: + return Qundef; + } +} + void Nokogiri_marshal_xpath_funcall_and_return_values(xmlXPathParserContextPtr ctx, int nargs, VALUE handler, const char *function_name) { - int i; VALUE result, doc; VALUE *argv; VALUE node_set = Qnil; @@ -143,40 +176,25 @@ Nokogiri_marshal_xpath_funcall_and_return_values(xmlXPathParserContextPtr ctx, i assert(DOC_RUBY_OBJECT_TEST(ctx->context->doc)); argv = (VALUE *)calloc((size_t)nargs, sizeof(VALUE)); - for (i = 0 ; i < nargs ; ++i) { - rb_gc_register_address(&argv[i]); + for (int j = 0 ; j < nargs ; ++j) { + rb_gc_register_address(&argv[j]); } doc = DOC_RUBY_OBJECT(ctx->context->doc); - if (nargs > 0) { - i = nargs - 1; - do { - obj = valuePop(ctx); - switch (obj->type) { - case XPATH_STRING: - argv[i] = NOKOGIRI_STR_NEW2(obj->stringval); - break; - case XPATH_BOOLEAN: - argv[i] = obj->boolval == 1 ? Qtrue : Qfalse; - break; - case XPATH_NUMBER: - argv[i] = rb_float_new(obj->floatval); - break; - case XPATH_NODESET: - argv[i] = noko_xml_node_set_wrap(obj->nodesetval, doc); - break; - default: - argv[i] = NOKOGIRI_STR_NEW2(xmlXPathCastToString(obj)); - } - xmlXPathFreeNodeSetList(obj); - } while (i-- > 0); + for (int j = nargs - 1 ; j >= 0 ; --j) { + obj = valuePop(ctx); + argv[j] = xpath2ruby(obj, ctx->context); + if (argv[j] == Qundef) { + argv[j] = NOKOGIRI_STR_NEW2(xmlXPathCastToString(obj)); + } + xmlXPathFreeNodeSetList(obj); } result = rb_funcall2(handler, rb_intern((const char *)function_name), nargs, argv); - for (i = 0 ; i < nargs ; ++i) { - rb_gc_unregister_address(&argv[i]); + for (int j = 0 ; j < nargs ; ++j) { + rb_gc_unregister_address(&argv[j]); } free(argv); @@ -275,7 +293,7 @@ static VALUE evaluate(int argc, VALUE *argv, VALUE self) { VALUE search_path, xpath_handler; - VALUE thing = Qnil; + VALUE retval = Qnil; xmlXPathContextPtr ctx; xmlXPathObjectPtr xpath; xmlChar *query; @@ -310,31 +328,14 @@ evaluate(int argc, VALUE *argv, VALUE self) rb_exc_raise(Nokogiri_wrap_xml_syntax_error(error)); } - assert(ctx->doc); - assert(DOC_RUBY_OBJECT_TEST(ctx->doc)); - - switch (xpath->type) { - case XPATH_STRING: - thing = NOKOGIRI_STR_NEW2(xpath->stringval); - xmlFree(xpath->stringval); - break; - case XPATH_NODESET: - thing = noko_xml_node_set_wrap(xpath->nodesetval, - DOC_RUBY_OBJECT(ctx->doc)); - break; - case XPATH_NUMBER: - thing = rb_float_new(xpath->floatval); - break; - case XPATH_BOOLEAN: - thing = xpath->boolval == 1 ? Qtrue : Qfalse; - break; - default: - thing = noko_xml_node_set_wrap(NULL, DOC_RUBY_OBJECT(ctx->doc)); + retval = xpath2ruby(xpath, ctx); + if (retval == Qundef) { + retval = noko_xml_node_set_wrap(NULL, DOC_RUBY_OBJECT(ctx->doc)); } xmlXPathFreeNodeSetList(xpath); - return thing; + return retval; } /* diff --git a/test/xml/test_xpath.rb b/test/xml/test_xpath.rb index 504a30caa8..785f543461 100644 --- a/test/xml/test_xpath.rb +++ b/test/xml/test_xpath.rb @@ -76,14 +76,22 @@ def test_unknown_attribute assert_nil @xml.xpath('//employee[@id="asdfasdf"]/@fooo')[0] end - def test_boolean + def test_boolean_false assert_equal false, @xml.xpath('1 = 2') end - def test_number + def test_boolean_true + assert_equal true, @xml.xpath('1 = 1') + end + + def test_number_integer assert_equal 2, @xml.xpath('1 + 1') end + def test_number_float + assert_equal 1.5, @xml.xpath('1.5') + end + def test_string assert_equal 'foo', @xml.xpath('concat("fo", "o")') end From 620f4660100fbe654cdd3dd2d641fa5e528d1649 Mon Sep 17 00:00:00 2001 From: Mike Dalessio Date: Fri, 22 Oct 2021 09:08:43 -0400 Subject: [PATCH 2/4] fix: memory leak when using iconv encoding handlers --- ext/nokogiri/xml_encoding_handler.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/ext/nokogiri/xml_encoding_handler.c b/ext/nokogiri/xml_encoding_handler.c index 1b7151794e..b45527596d 100644 --- a/ext/nokogiri/xml_encoding_handler.c +++ b/ext/nokogiri/xml_encoding_handler.c @@ -3,6 +3,14 @@ VALUE cNokogiriEncodingHandler; +static void +dealloc(xmlCharEncodingHandlerPtr c_handler) +{ + /* make sure iconv handlers are cleaned up and freed */ + xmlCharEncCloseFunc(c_handler); +} + + /* * call-seq: Nokogiri::EncodingHandler.[](name) * @@ -15,7 +23,7 @@ get(VALUE klass, VALUE key) handler = xmlFindCharEncodingHandler(StringValueCStr(key)); if (handler) { - return Data_Wrap_Struct(klass, NULL, NULL, handler); + return Data_Wrap_Struct(klass, NULL, dealloc, handler); } return Qnil; From 972737a223f19ff542535b2468d01d9493add6c0 Mon Sep 17 00:00:00 2001 From: Mike Dalessio Date: Fri, 22 Oct 2021 09:16:05 -0400 Subject: [PATCH 3/4] style: xml_encoding_handler function names --- ext/nokogiri/xml_encoding_handler.c | 30 +++++++++++++++++------------ 1 file changed, 18 insertions(+), 12 deletions(-) diff --git a/ext/nokogiri/xml_encoding_handler.c b/ext/nokogiri/xml_encoding_handler.c index b45527596d..8d5b88ecd5 100644 --- a/ext/nokogiri/xml_encoding_handler.c +++ b/ext/nokogiri/xml_encoding_handler.c @@ -4,7 +4,7 @@ VALUE cNokogiriEncodingHandler; static void -dealloc(xmlCharEncodingHandlerPtr c_handler) +_xml_encoding_handler_dealloc(xmlCharEncodingHandlerPtr c_handler) { /* make sure iconv handlers are cleaned up and freed */ xmlCharEncCloseFunc(c_handler); @@ -17,64 +17,68 @@ dealloc(xmlCharEncodingHandlerPtr c_handler) * Get the encoding handler for +name+ */ static VALUE -get(VALUE klass, VALUE key) +rb_xml_encoding_handler_s_get(VALUE klass, VALUE key) { xmlCharEncodingHandlerPtr handler; handler = xmlFindCharEncodingHandler(StringValueCStr(key)); if (handler) { - return Data_Wrap_Struct(klass, NULL, dealloc, handler); + return Data_Wrap_Struct(klass, NULL, _xml_encoding_handler_dealloc, handler); } return Qnil; } + /* * call-seq: Nokogiri::EncodingHandler.delete(name) * * Delete the encoding alias named +name+ */ static VALUE -delete (VALUE klass, VALUE name) +rb_xml_encoding_handler_s_delete(VALUE klass, VALUE name) { if (xmlDelEncodingAlias(StringValueCStr(name))) { return Qnil; } return Qtrue; } + /* * call-seq: Nokogiri::EncodingHandler.alias(from, to) * * Alias encoding handler with name +from+ to name +to+ */ static VALUE -alias(VALUE klass, VALUE from, VALUE to) +rb_xml_encoding_handler_s_alias(VALUE klass, VALUE from, VALUE to) { xmlAddEncodingAlias(StringValueCStr(from), StringValueCStr(to)); return to; } + /* * call-seq: Nokogiri::EncodingHandler.clear_aliases! * * Remove all encoding aliases. */ static VALUE -clear_aliases(VALUE klass) +rb_xml_encoding_handler_s_clear_aliases(VALUE klass) { xmlCleanupEncodingAliases(); return klass; } + /* * call-seq: name * * Get the name of this EncodingHandler */ static VALUE -name(VALUE self) +rb_xml_encoding_handler_name(VALUE self) { xmlCharEncodingHandlerPtr handler; @@ -83,6 +87,7 @@ name(VALUE self) return NOKOGIRI_STR_NEW2(handler->name); } + void noko_init_xml_encoding_handler() { @@ -90,9 +95,10 @@ noko_init_xml_encoding_handler() rb_undef_alloc_func(cNokogiriEncodingHandler); - rb_define_singleton_method(cNokogiriEncodingHandler, "[]", get, 1); - rb_define_singleton_method(cNokogiriEncodingHandler, "delete", delete, 1); - rb_define_singleton_method(cNokogiriEncodingHandler, "alias", alias, 2); - rb_define_singleton_method(cNokogiriEncodingHandler, "clear_aliases!", clear_aliases, 0); - rb_define_method(cNokogiriEncodingHandler, "name", name, 0); + rb_define_singleton_method(cNokogiriEncodingHandler, "[]", rb_xml_encoding_handler_s_get, 1); + rb_define_singleton_method(cNokogiriEncodingHandler, "delete", rb_xml_encoding_handler_s_delete, 1); + rb_define_singleton_method(cNokogiriEncodingHandler, "alias", rb_xml_encoding_handler_s_alias, 2); + rb_define_singleton_method(cNokogiriEncodingHandler, "clear_aliases!", rb_xml_encoding_handler_s_clear_aliases, 0); + + rb_define_method(cNokogiriEncodingHandler, "name", rb_xml_encoding_handler_name, 0); } From 2e71c1960748383d246bca41f0b96199de7e6b58 Mon Sep 17 00:00:00 2001 From: Mike Dalessio Date: Fri, 22 Oct 2021 09:53:42 -0400 Subject: [PATCH 4/4] fix: two memory leaks in Document#canonicalize - namespaces array was not being freed - argument type errors led to output buffer not being freed Also note that I cleaned up this function a bit. --- ext/nokogiri/xml_document.c | 70 ++++++++++++++++++------------------- 1 file changed, 35 insertions(+), 35 deletions(-) diff --git a/ext/nokogiri/xml_document.c b/ext/nokogiri/xml_document.c index b59ea4bd84..1af62ac10e 100644 --- a/ext/nokogiri/xml_document.c +++ b/ext/nokogiri/xml_document.c @@ -533,59 +533,59 @@ block_caller(void *ctx, xmlNodePtr c_node, xmlNodePtr c_parent_node) static VALUE rb_xml_document_canonicalize(int argc, VALUE *argv, VALUE self) { - VALUE mode; - VALUE incl_ns; - VALUE with_comments; - xmlChar **ns; - long ns_len, i; + VALUE rb_mode; + VALUE rb_namespaces; + VALUE rb_comments_p; + xmlChar **c_namespaces; - xmlDocPtr doc; - xmlOutputBufferPtr buf; - xmlC14NIsVisibleCallback cb = NULL; - void *ctx = NULL; + xmlDocPtr c_doc; + xmlOutputBufferPtr c_obuf; + xmlC14NIsVisibleCallback c_callback_wrapper = NULL; + void *rb_callback = NULL; VALUE rb_cStringIO; - VALUE io; + VALUE rb_io; - rb_scan_args(argc, argv, "03", &mode, &incl_ns, &with_comments); + rb_scan_args(argc, argv, "03", &rb_mode, &rb_namespaces, &rb_comments_p); + if (!NIL_P(rb_mode)) { Check_Type(rb_mode, T_FIXNUM); } + if (!NIL_P(rb_namespaces)) { Check_Type(rb_namespaces, T_ARRAY); } - Data_Get_Struct(self, xmlDoc, doc); + Data_Get_Struct(self, xmlDoc, c_doc); rb_cStringIO = rb_const_get_at(rb_cObject, rb_intern("StringIO")); - io = rb_class_new_instance(0, 0, rb_cStringIO); - buf = xmlAllocOutputBuffer(NULL); + rb_io = rb_class_new_instance(0, 0, rb_cStringIO); + c_obuf = xmlAllocOutputBuffer(NULL); - buf->writecallback = (xmlOutputWriteCallback)noko_io_write; - buf->closecallback = (xmlOutputCloseCallback)noko_io_close; - buf->context = (void *)io; + c_obuf->writecallback = (xmlOutputWriteCallback)noko_io_write; + c_obuf->closecallback = (xmlOutputCloseCallback)noko_io_close; + c_obuf->context = (void *)rb_io; if (rb_block_given_p()) { - cb = block_caller; - ctx = (void *)rb_block_proc(); + c_callback_wrapper = block_caller; + rb_callback = (void *)rb_block_proc(); } - if (NIL_P(incl_ns)) { - ns = NULL; + if (NIL_P(rb_namespaces)) { + c_namespaces = NULL; } else { - Check_Type(incl_ns, T_ARRAY); - ns_len = RARRAY_LEN(incl_ns); - ns = calloc((size_t)ns_len + 1, sizeof(xmlChar *)); - for (i = 0 ; i < ns_len ; i++) { - VALUE entry = rb_ary_entry(incl_ns, i); - ns[i] = (xmlChar *)StringValueCStr(entry); + long ns_len = RARRAY_LEN(rb_namespaces); + c_namespaces = calloc((size_t)ns_len + 1, sizeof(xmlChar *)); + for (int j = 0 ; j < ns_len ; j++) { + VALUE entry = rb_ary_entry(rb_namespaces, j); + c_namespaces[j] = (xmlChar *)StringValueCStr(entry); } } + xmlC14NExecute(c_doc, c_callback_wrapper, rb_callback, + (int)(NIL_P(rb_mode) ? 0 : NUM2INT(rb_mode)), + c_namespaces, + (int)RTEST(rb_comments_p), + c_obuf); - xmlC14NExecute(doc, cb, ctx, - (int)(NIL_P(mode) ? 0 : NUM2INT(mode)), - ns, - (int) RTEST(with_comments), - buf); - - xmlOutputBufferClose(buf); + free(c_namespaces); + xmlOutputBufferClose(c_obuf); - return rb_funcall(io, rb_intern("string"), 0); + return rb_funcall(rb_io, rb_intern("string"), 0); } VALUE