diff --git a/ext/ox/extconf.rb b/ext/ox/extconf.rb index 6272c10f..f5c027b2 100644 --- a/ext/ox/extconf.rb +++ b/ext/ox/extconf.rb @@ -38,6 +38,10 @@ have_func('rb_struct_alloc_noinit') have_func('rb_obj_encoding') have_func('rb_ivar_foreach') +have_func('rb_interned_str', 'ruby.h') +have_func('rb_interned_str_cstr', 'ruby.h') +have_func('rb_enc_interned_str', 'ruby.h') +have_func('rb_enc_interned_str_cstr', 'ruby.h') have_header('ruby/st.h') have_header('sys/uio.h') diff --git a/ext/ox/gen_load.c b/ext/ox/gen_load.c index 210e40dd..49e32630 100644 --- a/ext/ox/gen_load.c +++ b/ext/ox/gen_load.c @@ -105,24 +105,56 @@ create_prolog_doc(PInfo pi, const char *target, Attr attrs) { } else if (Yes == pi->options->sym_keys) { #if HAVE_RB_ENC_ASSOCIATE if (0 != pi->options->rb_enc) { +#if HAVE_RB_ENC_INTERNED_STR_CSTR + VALUE rstr = rb_enc_interned_str_cstr(attrs->name, pi->options->rb_enc); +#else VALUE rstr = rb_str_new2(attrs->name); rb_enc_associate(rstr, pi->options->rb_enc); +#endif sym = rb_funcall(rstr, ox_to_sym_id, 0); } else { sym = ID2SYM(rb_intern(attrs->name)); } sym = ID2SYM(rb_intern(attrs->name)); +#endif // HAVE_RB_ENC_ASSOCIATE +#if HAVE_RB_INTERNED_STR_CSTR + if (Yes == pi->options->intern_strings) { + rb_hash_aset(ah, sym, rb_interned_str_cstr(attrs->value)); + } else { #endif rb_hash_aset(ah, sym, rb_str_new2(attrs->value)); +#if HAVE_RB_INTERNED_STR_CSTR + } +#endif } else { #if HAVE_RB_ENC_ASSOCIATE +#if HAVE_ENC_RB_INTERNED_STR_CSTR && HAVE_RB_INTERNED_STR_CSTR + if (Yes == pi->options->intern_strings) { + if (0 != pi->options->rb_enc) { + volatile VALUE rstr = rb_enc_interned_str_cstr(attrs->name, pi->options->rb_enc); + } else { + volatile VALUE rstr = rb_interned_str_cstr(attrs->name); + } + } else { +#endif volatile VALUE rstr = rb_str_new2(attrs->name); if (0 != pi->options->rb_enc) { rb_enc_associate(rstr, pi->options->rb_enc); } +#if HAVE_ENC_RB_INTERNED_STR_CSTR && HAVE_RB_INTERNED_STR_CSTR + } +#endif +#if HAVE_RB_INTERNED_STR_CSTR + if (Yes == pi->options->intern_strings) { + rb_hash_aset(ah, rstr, rb_interned_str_cstr(attrs->value)); + } else { +#endif rb_hash_aset(ah, rstr, rb_str_new2(attrs->value)); +#if HAVE_RB_INTERNED_STR_CSTR + } +#endif #endif } if (0 == strcmp("encoding", attrs->name)) { @@ -193,12 +225,25 @@ nomode_instruct(PInfo pi, const char *target, Attr attrs, const char *content) { static void add_doctype(PInfo pi, const char *docType) { VALUE n = rb_obj_alloc(ox_doctype_clas); - VALUE s = rb_str_new2(docType); + VALUE s; +#if HAVE_RB_ENC_INTERNED_STR_CSTR && HAVE_RB_INTERNED_STR_CSTR + if (Yes == pi->options->intern_strings) { + if (0 != pi->options->rb_enc) { + s = rb_enc_interned_str_cstr(docType, pi->options->rb_enc); + } else { + s = rb_interned_str_cstr(docType); + } + } else { +#endif + s = rb_str_new2(docType); #if HAVE_RB_ENC_ASSOCIATE if (0 != pi->options->rb_enc) { rb_enc_associate(s, pi->options->rb_enc); } +#endif +#if HAVE_RB_ENC_INTERNED_STR_CSTR && HAVE_RB_INTERNED_STR_CSTR + } #endif rb_ivar_set(n, ox_at_value_id, s); if (helper_stack_empty(&pi->helpers)) { /* top level object */ @@ -210,12 +255,25 @@ add_doctype(PInfo pi, const char *docType) { static void add_comment(PInfo pi, const char *comment) { VALUE n = rb_obj_alloc(ox_comment_clas); - VALUE s = rb_str_new2(comment); + VALUE s; +#if HAVE_RB_ENC_INTERNED_STR_CSTR && HAVE_RB_INTERNED_STR_CSTR + if (Yes == pi->options->intern_strings) { + if (0 != pi->options->rb_enc) { + s = rb_enc_interned_str_cstr(comment, pi->options->rb_enc); + } else { + s = rb_interned_str_cstr(comment); + } + } else { +#endif + s = rb_str_new2(comment); #if HAVE_RB_ENC_ASSOCIATE if (0 != pi->options->rb_enc) { rb_enc_associate(s, pi->options->rb_enc); } +#endif +#if HAVE_RB_ENC_INTERNED_STR_CSTR && HAVE_RB_INTERNED_STR_CSTR + } #endif rb_ivar_set(n, ox_at_value_id, s); if (helper_stack_empty(&pi->helpers)) { /* top level object */ @@ -227,12 +285,25 @@ add_comment(PInfo pi, const char *comment) { static void add_cdata(PInfo pi, const char *cdata, size_t len) { VALUE n = rb_obj_alloc(ox_cdata_clas); - VALUE s = rb_str_new2(cdata); + VALUE s; +#if HAVE_RB_ENC_INTERNED_STR_CSTR && HAVE_RB_INTERNED_STR_CSTR + if (Yes == pi->options->intern_strings) { + if (0 != pi->options->rb_enc) { + s = rb_enc_interned_str_cstr(cdata, pi->options->rb_enc); + } else { + s = rb_interned_str_cstr(cdata); + } + } else { +#endif + s = rb_str_new2(cdata); #if HAVE_RB_ENC_ASSOCIATE if (0 != pi->options->rb_enc) { rb_enc_associate(s, pi->options->rb_enc); } +#endif +#if HAVE_RB_ENC_INTERNED_STR_CSTR && HAVE_RB_INTERNED_STR_CSTR + } #endif rb_ivar_set(n, ox_at_value_id, s); if (helper_stack_empty(&pi->helpers)) { /* top level object */ @@ -243,12 +314,25 @@ add_cdata(PInfo pi, const char *cdata, size_t len) { static void add_text(PInfo pi, char *text, int closed) { - VALUE s = rb_str_new2(text); + VALUE s; +#if HAVE_RB_ENC_INTERNED_STR_CSTR && HAVE_RB_INTERNED_STR_CSTR + if (Yes == pi->options->intern_strings) { + if (0 != pi->options->rb_enc) { + s = rb_enc_interned_str_cstr(text, pi->options->rb_enc); + } else { + s = rb_interned_str_cstr(text); + } + } else { +#endif + s = rb_str_new2(text); #if HAVE_RB_ENC_ASSOCIATE if (0 != pi->options->rb_enc) { rb_enc_associate(s, pi->options->rb_enc); } +#endif +#if HAVE_RB_ENC_INTERNED_STR_CSTR && HAVE_RB_INTERNED_STR_CSTR + } #endif if (helper_stack_empty(&pi->helpers)) { /* top level object */ create_doc(pi); @@ -285,9 +369,13 @@ add_element(PInfo pi, const char *ename, Attr attrs, int hasChildren) { if (Qundef == (sym = ox_cache_get(ox_symbol_cache, attrs->name, &slot, 0))) { #if HAVE_RB_ENC_ASSOCIATE if (0 != pi->options->rb_enc) { +#if HAVE_RB_ENC_INTERNED_STR_CSTR + VALUE rstr = rb_enc_interned_str_cstr(attrs->name, pi->options->rb_enc); +#else VALUE rstr = rb_str_new2(attrs->name); rb_enc_associate(rstr, pi->options->rb_enc); +#endif sym = rb_funcall(rstr, ox_to_sym_id, 0); } else { sym = ID2SYM(rb_intern(attrs->name)); @@ -301,18 +389,42 @@ add_element(PInfo pi, const char *ename, Attr attrs, int hasChildren) { *slot = sym; } } else { +#if HAVE_RB_ENC_INTERNED_STR_CSTR && HAVE_RB_INTERNED_STR_CSTR + if (Yes == pi->options->intern_strings) { + if (0 != pi->options->rb_enc) { + sym = rb_enc_interned_str_cstr(attrs->name, pi->options->rb_enc); + } else { + sym = rb_interned_str_cstr(attrs->name); + } + } else { +#endif sym = rb_str_new2(attrs->name); #if HAVE_RB_ENC_ASSOCIATE if (0 != pi->options->rb_enc) { rb_enc_associate(sym, pi->options->rb_enc); } +#endif +#if HAVE_RB_ENC_INTERNED_STR_CSTR && HAVE_RB_INTERNED_STR_CSTR + } #endif } +#if HAVE_RB_ENC_INTERNED_STR_CSTR && HAVE_RB_INTERNED_STR_CSTR + if (Yes == pi->options->intern_strings) { + if (0 != pi->options->rb_enc) { + s = rb_enc_interned_str_cstr(attrs->value, pi->options->rb_enc); + } else { + s = rb_interned_str_cstr(attrs->value); + } + } else { +#endif s = rb_str_new2(attrs->value); #if HAVE_RB_ENC_ASSOCIATE if (0 != pi->options->rb_enc) { rb_enc_associate(s, pi->options->rb_enc); } +#if HAVE_RB_ENC_INTERNED_STR_CSTR && HAVE_RB_INTERNED_STR_CSTR + } +#endif #endif rb_hash_aset(ah, sym, s); } @@ -343,8 +455,24 @@ end_element(PInfo pi, const char *ename) { static void add_instruct(PInfo pi, const char *name, Attr attrs, const char *content) { VALUE inst; - VALUE s = rb_str_new2(name); - VALUE c = Qnil; + VALUE s; + VALUE c; +#if HAVE_RB_ENC_INTERNED_STR_CSTR && HAVE_RB_INTERNED_STR_CSTR + if (Yes == pi->options->intern_strings) { + if (0 != pi->options->rb_enc && 0 != content) { + s = rb_enc_interned_str_cstr(name, pi->options->rb_enc); + c = rb_enc_interned_str_cstr(content, pi->options->rb_enc); + } else if (0 != content) { + s = rb_interned_str_cstr(name); + c = rb_interned_str_cstr(content); + } else { + s = rb_interned_str_cstr(name); + c = Qnil; + } + } else { + c = Qnil; +#endif + s = rb_str_new2(name); if (0 != content) { c = rb_str_new2(content); @@ -356,6 +484,9 @@ add_instruct(PInfo pi, const char *name, Attr attrs, const char *content) { rb_enc_associate(c, pi->options->rb_enc); } } +#endif +#if HAVE_RB_ENC_INTERNED_STR_CSTR && HAVE_RB_INTERNED_STR_CSTR + } #endif inst = rb_obj_alloc(ox_instruct_clas); rb_ivar_set(inst, ox_at_value_id, s); @@ -374,9 +505,13 @@ add_instruct(PInfo pi, const char *name, Attr attrs, const char *content) { if (Qundef == (sym = ox_cache_get(ox_symbol_cache, attrs->name, &slot, 0))) { #if HAVE_RB_ENC_ASSOCIATE if (0 != pi->options->rb_enc) { +#if HAVE_RB_ENC_INTERNED_STR_CSTR + VALUE rstr = rb_enc_interned_str_cstr(attrs->name, pi->options->rb_enc); +#else VALUE rstr = rb_str_new2(attrs->name); rb_enc_associate(rstr, pi->options->rb_enc); +#endif sym = rb_funcall(rstr, ox_to_sym_id, 0); } else { sym = ID2SYM(rb_intern(attrs->name)); @@ -390,18 +525,42 @@ add_instruct(PInfo pi, const char *name, Attr attrs, const char *content) { *slot = sym; } } else { +#if HAVE_RB_ENC_INTERNED_STR_CSTR && HAVE_RB_INTERNED_STR_CSTR + if (Yes == pi->options->intern_strings) { + if (0 != pi->options->rb_enc) { + sym = rb_enc_interned_str_cstr(attrs->name, pi->options->rb_enc); + } else { + sym = rb_interned_str_cstr(attrs->name); + } + } else { +#endif sym = rb_str_new2(attrs->name); #if HAVE_RB_ENC_ASSOCIATE if (0 != pi->options->rb_enc) { rb_enc_associate(sym, pi->options->rb_enc); } +#endif +#if HAVE_RB_ENC_INTERNED_STR_CSTR && HAVE_RB_INTERNED_STR_CSTR + } #endif } +#if HAVE_RB_ENC_INTERNED_STR_CSTR && HAVE_RB_INTERNED_STR_CSTR + if (Yes == pi->options->intern_strings) { + if (0 != pi->options->rb_enc) { + s = rb_enc_interned_str_cstr(attrs->value, pi->options->rb_enc); + } else { + s = rb_interned_str_cstr(attrs->value); + } + } else { +#endif s = rb_str_new2(attrs->value); #if HAVE_RB_ENC_ASSOCIATE if (0 != pi->options->rb_enc) { rb_enc_associate(s, pi->options->rb_enc); } +#endif +#if HAVE_RB_ENC_INTERNED_STR_CSTR && HAVE_RB_INTERNED_STR_CSTR + } #endif rb_hash_aset(ah, sym, s); } diff --git a/ext/ox/hash_load.c b/ext/ox/hash_load.c index 8282c8b4..39ddf4e0 100644 --- a/ext/ox/hash_load.c +++ b/ext/ox/hash_load.c @@ -78,11 +78,6 @@ add_str(PInfo pi, VALUE s) { Helper parent = helper_stack_peek(&pi->helpers); volatile VALUE a; -#if HAVE_RB_ENC_ASSOCIATE - if (0 != pi->options->rb_enc) { - rb_enc_associate(s, pi->options->rb_enc); - } -#endif switch (parent->type) { case NoCode: parent->obj = s; @@ -103,12 +98,48 @@ add_str(PInfo pi, VALUE s) { static void add_text(PInfo pi, char *text, int closed) { - add_str(pi, rb_str_new2(text)); +#if HAVE_RB_ENC_INTERNED_STR_CSTR && HAVE_RB_INTERNED_STR_CSTR + if (Yes == pi->options->intern_strings) { + if (0 != pi->options->rb_enc) { + add_str(pi, rb_enc_interned_str_cstr(text, pi->options->rb_enc)); + } else { + add_str(pi, rb_interned_str_cstr(text)); + } + } else { +#endif + VALUE s = rb_str_new2(text); +#if HAVE_RB_ENC_ASSOCIATE + if (0 != pi->options->rb_enc) { + rb_enc_associate(s, pi->options->rb_enc); + } +#endif + add_str(pi, s); +#if HAVE_RB_ENC_INTERNED_STR && HAVE_RB_INTERNED_STR + } +#endif } static void add_cdata(PInfo pi, const char *text, size_t len) { - add_str(pi, rb_str_new(text, len)); +#if HAVE_RB_ENC_INTERNED_STR && HAVE_RB_INTERNED_STR + if (Yes == pi->options->intern_strings) { + if (0 != pi->options->rb_enc) { + add_str(pi, rb_enc_interned_str(text, len, pi->options->rb_enc)); + } else { + add_str(pi, rb_interned_str(text, len)); + } + } else { +#endif + VALUE s = rb_str_new(text, len); +#if HAVE_RB_ENC_ASSOCIATE + if (0 != pi->options->rb_enc) { + rb_enc_associate(s, pi->options->rb_enc); + } +#endif + add_str(pi, s); +#if HAVE_RB_ENC_INTERNED_STR && HAVE_RB_INTERNED_STR + } +#endif } static void @@ -127,14 +158,30 @@ add_element(PInfo pi, const char *ename, Attr attrs, int hasChildren) { key = rb_funcall(pi->options->attr_key_mod, ox_call_id, 1, rb_str_new2(attrs->name)); } else if (Yes == pi->options->sym_keys) { key = rb_id2sym(rb_intern(attrs->name)); +#if HAVE_RB_INTERNED_STR_CSTR + } else if (Yes == pi->options->intern_strings) { + key = rb_interned_str_cstr(attrs->name); +#endif } else { key = rb_str_new2(attrs->name); } +#if HAVE_RB_INTERNED_STR_CSTR && HAVE_RB_ENC_INTERNED_STR_CSTR + if (Yes == pi->options->intern_strings) { + if (0 != pi->options->rb_enc) { + val = rb_enc_interned_str_cstr(attrs->value, pi->options->rb_enc); + } else { + val = rb_interned_str_cstr(attrs->value); + } + } else { +#endif val = rb_str_new2(attrs->value); #if HAVE_RB_ENC_ASSOCIATE if (0 != pi->options->rb_enc) { rb_enc_associate(val, pi->options->rb_enc); } +#if HAVE_RB_INTERNED_STR_CSTR && HAVE_RB_ENC_INTERNED_STR_CSTR + } +#endif #endif rb_hash_aset(h, key, val); } diff --git a/ext/ox/obj_load.c b/ext/ox/obj_load.c index 75c4e294..4b8644cc 100644 --- a/ext/ox/obj_load.c +++ b/ext/ox/obj_load.c @@ -61,7 +61,15 @@ inline static VALUE str2sym(const char *str, void *encoding) { VALUE sym; -#ifdef HAVE_RUBY_ENCODING_H +#if HAVE_RB_ENC_INTERNED_STR_CSTR && HAVE_RB_INTERNED_STR_CSTR + VALUE rstr; + if (0 != encoding) { + rstr = rb_enc_interned_str_cstr(str, (rb_encoding*)encoding); + } else { + rstr = rb_interned_str_cstr(str); + } + sym = rb_funcall(rstr, ox_to_sym_id, 0); +#elif HAVE_RUBY_ENCODING_H if (0 != encoding) { VALUE rstr = rb_str_new2(str); @@ -84,7 +92,17 @@ name2var(const char *name, void *encoding) { if ('0' <= *name && *name <= '9') { var_id = INT2NUM(atoi(name)); } else if (Qundef == (var_id = ox_cache_get(ox_attr_cache, name, &slot, 0))) { -#ifdef HAVE_RUBY_ENCODING_H +#if HAVE_RB_ENC_INTERNED_STR_CSTR && HAVE_RB_INTERNED_STR_CSTR + volatile VALUE rstr; + volatile VALUE sym; + if (0 != encoding) { + rstr = rb_enc_interned_str_cstr(name, (rb_encoding*)encoding); + } else { + rstr = rb_interned_str_cstr(name); + } + sym = rb_funcall(rstr, ox_to_sym_id, 0); + var_id = SYM2ID(sym); +#elif HAVE_RUBY_ENCODING_H if (0 != encoding) { volatile VALUE rstr = rb_str_new2(name); volatile VALUE sym; @@ -192,7 +210,11 @@ parse_time(const char *text, VALUE clas) { Qnil == (t = parse_xsd_time(text, clas))) { VALUE args[1]; +#if HAVE_RB_INTERNED_STR_CSTR + *args = rb_interned_str_cstr(text); +#else *args = rb_str_new2(text); +#endif t = rb_funcall2(ox_time_class, ox_parse_id, 1, args); } return t; @@ -407,11 +429,23 @@ add_text(PInfo pi, char *text, int closed) { switch (h->type) { case NoCode: case StringCode: +#if HAVE_RB_ENC_INTERNED_STR_CSTR && HAVE_RB_INTERNED_STR_CSTR + if (Yes == pi->options->intern_strings) { + if (0 != pi->options->rb_enc) { + h->obj = rb_enc_interned_str_cstr(text, pi->options->rb_enc); + } else { + h->obj = rb_interned_str_cstr(text); + } + } else { +#endif h->obj = rb_str_new2(text); #if HAVE_RB_ENC_ASSOCIATE if (0 != pi->options->rb_enc) { rb_enc_associate(h->obj, pi->options->rb_enc); } +#endif +#if HAVE_RB_ENC_INTERNED_STR_CSTR && HAVE_RB_INTERNED_STR_CSTR + } #endif if (0 != pi->circ_array) { circ_array_set(pi->circ_array, h->obj, (unsigned long)pi->id); @@ -480,11 +514,23 @@ add_text(PInfo pi, char *text, int closed) { char *str = ALLOCA_N(char, str_size + 1); from_base64(text, (uchar*)str); +#if HAVE_RB_ENC_INTERNED_STR_CSTR && HAVE_RB_INTERNED_STR_CSTR + if (Yes == pi->options->intern_strings) { + if (0 != pi->options->rb_enc) { + v = rb_enc_interned_str_cstr(str, pi->options->rb_enc); + } else { + v = rb_interned_str_cstr(str); + } + } else { +#endif v = rb_str_new(str, str_size); #if HAVE_RB_ENC_ASSOCIATE if (0 != pi->options->rb_enc) { rb_enc_associate(v, pi->options->rb_enc); } +#endif +#if HAVE_RB_ENC_INTERNED_STR_CSTR && HAVE_RB_INTERNED_STR_CSTR + } #endif if (0 != pi->circ_array) { circ_array_set(pi->circ_array, v, (unsigned long)h->obj); @@ -525,7 +571,11 @@ add_text(PInfo pi, char *text, int closed) { h->obj = rb_cstr_to_inum(text, 10, 1); break; case BigDecimalCode: +#if HAVE_RB_INTERNED_STR_CSTR + h->obj = rb_funcall(rb_cObject, ox_bigdecimal_id, 1, rb_interned_str_cstr(text)); +#else h->obj = rb_funcall(rb_cObject, ox_bigdecimal_id, 1, rb_str_new2(text)); +#endif break; default: h->obj = Qnil; @@ -701,7 +751,15 @@ end_element(PInfo pi, const char *ename) { if (ox_empty_string == h->obj) { // special catch for empty strings +#if HAVE_RB_INTERNED_STR_CSTR + if (Yes == pi->options->intern_strings) { + h->obj = rb_interned_str_cstr(""); + } else { +#endif h->obj = rb_str_new2(""); +#if HAVE_RB_INTERNED_STR_CSTR + } +#endif } else if (Qundef == h->obj) { set_error(&pi->err, "Invalid element for object mode", pi->str, pi->s); return; diff --git a/ext/ox/ox.c b/ext/ox/ox.c index 2d4961fb..14e0c991 100644 --- a/ext/ox/ox.c +++ b/ext/ox/ox.c @@ -123,6 +123,7 @@ static VALUE hash_no_attrs_sym; static VALUE hash_sym; static VALUE inactive_sym; static VALUE invalid_replace_sym; +static VALUE intern_strings_sym; static VALUE limited_sym; static VALUE margin_sym; static VALUE mode_sym; @@ -175,6 +176,7 @@ struct _options ox_default_options = { NoMode, // mode StrictEffort, // effort Yes, // sym_keys + No, // intern_strings SpcSkip, // skip No, // smart true, // convert_special @@ -267,7 +269,11 @@ hints_to_overlay(Hints hints) { case ActiveOverlay: default: ov = active_sym; break; } +#if HAVE_RB_INTERNED_STR_CSTR + rb_hash_aset(overlay, rb_interned_str_cstr(h->name), ov); +#else rb_hash_aset(overlay, rb_str_new2(h->name), ov); +#endif } return overlay; } @@ -287,6 +293,7 @@ hints_to_overlay(Hints hints) { * - _:mode_ [:object|:generic|:limited|:hash|:hash_no_attrs|nil] load method to use for XML * - _:effort_ [:strict|:tolerant|:auto_define] set the tolerance level for loading * - _:symbolize_keys_ [true|false|nil] symbolize element attribute keys or leave as Strings + * - _:intern_strings_ [true|false|nil] intern (freeze and deduplicate) `String` attribute keys and `Ox::Value.as_s` * - _:element_key_mod_ [Proc|nil] converts element keys on parse if not nil * - _:attr_key_mod_ [Proc|nil] converts attribute keys on parse if not nil * - _:skip_ [:skip_none|:skip_return|:skip_white|:skip_off] determines how to handle white space in text @@ -314,8 +321,16 @@ get_def_opts(VALUE self) { VALUE opts = rb_hash_new(); int elen = (int)strlen(ox_default_options.encoding); +#if HAVE_RB_INTERNED_STR + rb_hash_aset(opts, ox_encoding_sym, (0 == elen) ? Qnil : rb_interned_str(ox_default_options.encoding, elen)); +#else rb_hash_aset(opts, ox_encoding_sym, (0 == elen) ? Qnil : rb_str_new(ox_default_options.encoding, elen)); +#endif +#if HAVE_RB_INTERNED_STR + rb_hash_aset(opts, margin_sym, rb_interned_str(ox_default_options.margin, ox_default_options.margin_len)); +#else rb_hash_aset(opts, margin_sym, rb_str_new(ox_default_options.margin, ox_default_options.margin_len)); +#endif rb_hash_aset(opts, ox_indent_sym, INT2FIX(ox_default_options.indent)); rb_hash_aset(opts, trace_sym, INT2FIX(ox_default_options.trace)); rb_hash_aset(opts, with_dtd_sym, (Yes == ox_default_options.with_dtd) ? Qtrue : ((No == ox_default_options.with_dtd) ? Qfalse : Qnil)); @@ -324,6 +339,7 @@ get_def_opts(VALUE self) { rb_hash_aset(opts, circular_sym, (Yes == ox_default_options.circular) ? Qtrue : ((No == ox_default_options.circular) ? Qfalse : Qnil)); rb_hash_aset(opts, xsd_date_sym, (Yes == ox_default_options.xsd_date) ? Qtrue : ((No == ox_default_options.xsd_date) ? Qfalse : Qnil)); rb_hash_aset(opts, symbolize_keys_sym, (Yes == ox_default_options.sym_keys) ? Qtrue : ((No == ox_default_options.sym_keys) ? Qfalse : Qnil)); + rb_hash_aset(opts, intern_strings_sym, (Yes == ox_default_options.intern_strings) ? Qtrue : ((No == ox_default_options.intern_strings) ? Qfalse : Qnil)); rb_hash_aset(opts, attr_key_mod_sym, ox_default_options.attr_key_mod); rb_hash_aset(opts, element_key_mod_sym, ox_default_options.element_key_mod); rb_hash_aset(opts, smart_sym, (Yes == ox_default_options.smart) ? Qtrue : ((No == ox_default_options.smart) ? Qfalse : Qnil)); @@ -356,14 +372,22 @@ get_def_opts(VALUE self) { if (Yes == ox_default_options.allow_invalid) { rb_hash_aset(opts, invalid_replace_sym, Qnil); } else { +#if HAVE_RB_INTERNED_STR_CSTR + rb_hash_aset(opts, invalid_replace_sym, rb_interned_str(ox_default_options.inv_repl + 1, (int)*ox_default_options.inv_repl)); +#else rb_hash_aset(opts, invalid_replace_sym, rb_str_new(ox_default_options.inv_repl + 1, (int)*ox_default_options.inv_repl)); +#endif } if ('\0' == *ox_default_options.strip_ns) { rb_hash_aset(opts, strip_namespace_sym, Qfalse); } else if ('*' == *ox_default_options.strip_ns && '\0' == ox_default_options.strip_ns[1]) { rb_hash_aset(opts, strip_namespace_sym, Qtrue); } else { +#if HAVE_RB_INTERNED_STR_CSTR + rb_hash_aset(opts, strip_namespace_sym, rb_interned_str(ox_default_options.strip_ns, strlen(ox_default_options.strip_ns))); +#else rb_hash_aset(opts, strip_namespace_sym, rb_str_new(ox_default_options.strip_ns, strlen(ox_default_options.strip_ns))); +#endif } if (NULL == ox_default_options.html_hints) { //rb_hash_aset(opts, overlay_sym, hints_to_overlay(ox_hints_html())); @@ -431,6 +455,7 @@ sax_html_overlay(VALUE self) { * - _:mode_ [:object|:generic|:limited|:hash|:hash_no_attrs|nil] load method to use for XML * - _:effort_ [:strict|:tolerant|:auto_define] set the tolerance level for loading * - _:symbolize_keys_ [true|false|nil] symbolize element attribute keys or leave as Strings + * - _:intern_strings_ [true|false|nil] intern (freeze and deduplicate) `String` attribute keys and `Ox::Value.as_s` * - _:element_key_mod_ [Proc|nil] converts element keys on parse if not nil * - _:attr_key_mod_ [Proc|nil] converts attribute keys on parse if not nil * - _:skip_ [:skip_none|:skip_return|:skip_white|:skip_off] determines how to handle white space in text @@ -457,6 +482,7 @@ set_def_opts(VALUE self, VALUE opts) { { xsd_date_sym, &ox_default_options.xsd_date }, { circular_sym, &ox_default_options.circular }, { symbolize_keys_sym, &ox_default_options.sym_keys }, + { intern_strings_sym, &ox_default_options.intern_strings }, { smart_sym, &ox_default_options.smart }, { Qnil, 0 } }; @@ -788,6 +814,9 @@ load(char *xml, size_t len, int argc, VALUE *argv, VALUE self, VALUE encoding, E if (Qnil != (v = rb_hash_lookup(h, symbolize_keys_sym))) { options.sym_keys = (Qfalse == v) ? No : Yes; } + if (Qnil != (v = rb_hash_lookup(h, intern_strings_sym))) { + options.intern_strings = (Qfalse == v) ? No : Yes; + } options.element_key_mod = rb_hash_lookup2(h, element_key_mod_sym, options.element_key_mod); options.attr_key_mod = rb_hash_lookup2(h, attr_key_mod_sym, options.attr_key_mod); @@ -928,6 +957,7 @@ load(char *xml, size_t len, int argc, VALUE *argv, VALUE self, VALUE encoding, E * - _:auto_define_ - auto define missing classes and modules * - *:trace* [Fixnum] trace level as a Fixnum, default: 0 (silent) * - *:symbolize_keys* [true|false|nil] symbolize element attribute keys or leave as Strings + * - *:intern_strings* [true|false|nil] intern (freeze and deduplicate) `String` attribute keys and `Ox::Value.as_s` * - *:invalid_replace* [nil|String] replacement string for invalid XML characters on dump. nil indicates include anyway as hex. A string, limited to 10 characters will replace the invalid character with the replace. * - *:strip_namespace* [String|true|false] "" or false result in no namespace stripping. A string of "*" or true will strip all namespaces. Any other non-empty string indicates that matching namespaces will be stripped. * - *:with_cdata* [true|false] if true cdata is included in hash_load output otherwise it is not. @@ -985,6 +1015,7 @@ load_str(int argc, VALUE *argv, VALUE self) { * - _:auto_define_ - auto define missing classes and modules * - *:trace* [Fixnum] trace level as a Fixnum, default: 0 (silent) * - *:symbolize_keys* [true|false|nil] symbolize element attribute keys or leave as Strings + * - *:intern_strings* [true|false|nil] intern (freeze and deduplicate) `String` attribute keys and `Ox::Value.as_s` * - *:invalid_replace* [nil|String] replacement string for invalid XML characters on dump. nil indicates include anyway as hex. A string, limited to 10 characters will replace the invalid character with the replace. * - *:strip_namespace* [String|true|false] "" or false result in no namespace stripping. A string of "*" or true will strip all namespaces. Any other non-empty string indicates that matching namespaces will be stripped. */ @@ -1037,6 +1068,7 @@ load_file(int argc, VALUE *argv, VALUE self) { * - +options+ [Hash] options parse options * - *:convert_special* [true|false] flag indicating special characters like < are converted * - *:symbolize* [true|false] flag indicating the parser symbolize element and attribute names + * - *:intern_strings* [true|false] intern (freeze and deduplicate) `String` attribute keys and `Ox::Value.as_s` * - *:smart* [true|false] flag indicating the parser uses hints if available (use with html) * - *:skip* [:skip_none|:skip_return|:skip_white|:skip_off] flag indicating the parser skips \\r or collpase white space into a single space. Default (skip space) * - *:strip_namespace* [nil|String|true|false] "" or false result in no namespace stripping. A string of "*" or true will strip all namespaces. Any other non-empty string indicates that matching namespaces will be stripped. @@ -1046,6 +1078,7 @@ sax_parse(int argc, VALUE *argv, VALUE self) { struct _saxOptions options; options.symbolize = (No != ox_default_options.sym_keys); + options.intern_strings = (No != ox_default_options.intern_strings); options.convert_special = ox_default_options.convert_special; options.smart = (Yes == ox_default_options.smart); options.skip = ox_default_options.skip; @@ -1068,6 +1101,9 @@ sax_parse(int argc, VALUE *argv, VALUE self) { if (Qnil != (v = rb_hash_lookup(h, symbolize_sym))) { options.symbolize = (Qtrue == v); } + if (Qnil != (v = rb_hash_lookup(h, intern_strings_sym))) { + options.intern_strings = (Qtrue == v); + } if (Qnil != (v = rb_hash_lookup(h, skip_sym))) { if (skip_return_sym == v) { options.skip = CrSkip; @@ -1113,6 +1149,7 @@ sax_parse(int argc, VALUE *argv, VALUE self) { * - +options+ [Hash] options parse options * - *:convert_special* [true|false] flag indicating special characters like < are converted * - *:symbolize* [true|false] flag indicating the parser symbolize element and attribute names + * - *:intern_strings* [true|false] flag indicating Strings are frozen and interned (deduplicated) * - *:skip* [:skip_none|:skip_return|:skip_white|:skip_off] flag indicating the parser skips \\r or collapse white space into a single space. Default (skip space) * - *:overlay* [Hash] a Hash of keys that match html element names and values that are one of * - _:active_ - make the normal callback for the element @@ -1128,6 +1165,7 @@ sax_html(int argc, VALUE *argv, VALUE self) { bool free_hints = false; options.symbolize = (No != ox_default_options.sym_keys); + options.intern_strings = (No != ox_default_options.intern_strings); options.convert_special = ox_default_options.convert_special; options.smart = true; options.skip = ox_default_options.skip; @@ -1150,6 +1188,9 @@ sax_html(int argc, VALUE *argv, VALUE self) { if (Qnil != (v = rb_hash_lookup(h, symbolize_sym))) { options.symbolize = (Qtrue == v); } + if (Qnil != (v = rb_hash_lookup(h, intern_strings_sym))) { + options.intern_strings = (Qtrue == v); + } if (Qnil != (v = rb_hash_lookup(h, skip_sym))) { if (skip_return_sym == v) { options.skip = CrSkip; @@ -1189,6 +1230,7 @@ parse_dump_options(VALUE ropts, Options copts) { { with_dtd_sym, &copts->with_dtd }, { with_instruct_sym, &copts->with_instruct }, { xsd_date_sym, &copts->xsd_date }, + { intern_strings_sym, &copts->intern_strings }, { circular_sym, &copts->circular }, { Qnil, 0 } }; @@ -1295,6 +1337,7 @@ parse_dump_options(VALUE ropts, Options copts) { * - *:indent* [Fixnum] format expected * - *:no_empty* [true|false] if true don't output empty elements * - *:xsd_date* [true|false] use XSD date format if true, default: false + * - *:intern_strings* [true|false] intern (freeze and deduplicate) return Strings * - *:circular* [true|false] allow circular references, default: false * - *:strict|:tolerant]* [ :effort effort to use when an undumpable object (e.g., IO) is encountered, default: :strict * - _:strict_ - raise an NotImplementedError if an undumpable object is encountered @@ -1315,11 +1358,28 @@ dump(int argc, VALUE *argv, VALUE self) { if (0 == (xml = ox_write_obj_to_str(*argv, &copts))) { rb_raise(rb_eNoMemError, "Not enough memory.\n"); } +#if HAVE_RB_ENC_INTERNED_STR_CSTR && HAVE_RB_INTERNED_STR_CSTR + if (Yes == copts.intern_strings) { + if ('\0' != *copts.encoding) { + rstr = rb_enc_interned_str_cstr(xml, rb_enc_find(copts.encoding)); + } else { + rstr = rb_interned_str_cstr(xml); + } + } else { + rstr = rb_str_new2(xml); +#if HAVE_RB_ENC_ASSOCIATE + if ('\0' != *copts.encoding) { + rb_enc_associate(rstr, rb_enc_find(copts.encoding)); + } + } +#endif +#else rstr = rb_str_new2(xml); #if HAVE_RB_ENC_ASSOCIATE if ('\0' != *copts.encoding) { rb_enc_associate(rstr, rb_enc_find(copts.encoding)); } +#endif #endif xfree(xml); @@ -1334,6 +1394,7 @@ dump(int argc, VALUE *argv, VALUE self) { * - *:indent* [Fixnum] format expected * - *:no_empty* [true|false] if true don't output empty elements * - *:xsd_date* [true|false] use XSD date format if true, default: false + * - *:intern_strings* [true|false] intern (freeze and deduplicate) return Strings, default: false * - *:circular* [true|false] allow circular references, default: false * - *:strict|:tolerant]* [ :effort effort to use when an undumpable object (e.g., IO) is encountered, default: :strict * - _:strict_ - raise an NotImplementedError if an undumpable object is encountered @@ -1367,6 +1428,9 @@ static VALUE to_file(int argc, VALUE *argv, VALUE self) { struct _options copts = ox_default_options; + // enable String interning by default here since the return value is going straight into an IO + copts.intern_strings = true; + if (3 == argc) { parse_dump_options(argv[2], &copts); } @@ -1512,6 +1576,7 @@ void Init_ox() { hash_sym = ID2SYM(rb_intern("hash")); rb_gc_register_address(&hash_sym); inactive_sym = ID2SYM(rb_intern("inactive")); rb_gc_register_address(&inactive_sym); invalid_replace_sym = ID2SYM(rb_intern("invalid_replace")); rb_gc_register_address(&invalid_replace_sym); + intern_strings_sym = ID2SYM(rb_intern("intern_strings")); rb_gc_register_address(&intern_strings_sym); limited_sym = ID2SYM(rb_intern("limited")); rb_gc_register_address(&limited_sym); margin_sym = ID2SYM(rb_intern("margin")); rb_gc_register_address(&margin_sym); mode_sym = ID2SYM(rb_intern("mode")); rb_gc_register_address(&mode_sym); @@ -1545,7 +1610,11 @@ void Init_ox() { with_xml_sym = ID2SYM(rb_intern("with_xml")); rb_gc_register_address(&with_xml_sym); xsd_date_sym = ID2SYM(rb_intern("xsd_date")); rb_gc_register_address(&xsd_date_sym); +#if HAVE_RB_INTERNED_STR_CSTR + ox_empty_string = rb_interned_str_cstr(""); rb_gc_register_address(&ox_empty_string); +#else ox_empty_string = rb_str_new2(""); rb_gc_register_address(&ox_empty_string); +#endif ox_zero_fixnum = INT2NUM(0); rb_gc_register_address(&ox_zero_fixnum); ox_sym_bank = rb_ary_new(); rb_gc_register_address(&ox_sym_bank); diff --git a/ext/ox/ox.h b/ext/ox/ox.h index fa252015..042b5128 100644 --- a/ext/ox/ox.h +++ b/ext/ox/ox.h @@ -131,6 +131,7 @@ typedef struct _options { char mode; // LoadMode char effort; // Effort char sym_keys; // symbolize keys + char intern_strings; // intern String keys and Values char skip; // skip mode char smart; // YesNo sax smart mode char convert_special;// boolean true or false diff --git a/ext/ox/sax.c b/ext/ox/sax.c index 5e94cfcc..26955fd1 100644 --- a/ext/ox/sax.c +++ b/ext/ox/sax.c @@ -92,10 +92,14 @@ str2sym(SaxDrive dr, const char *str, const char **strp) { if (Qundef == (sym = ox_cache_get(ox_symbol_cache, str, &slot, strp))) { #if HAVE_RB_ENC_ASSOCIATE if (0 != dr->encoding && !str_is_ascii(str)) { +#if HAVE_RB_ENC_INTERNED_STR_CSTR + VALUE rstr = rb_enc_interned_str_cstr(str, dr->encoding); +#else VALUE rstr = rb_str_new2(str); // TBD if sym can be pinned down then use this all the time rb_enc_associate(rstr, dr->encoding); +#endif sym = rb_funcall(rstr, ox_to_sym_id, 0); *slot = Qundef; } else { @@ -108,11 +112,23 @@ str2sym(SaxDrive dr, const char *str, const char **strp) { #endif } } else { +#if HAVE_RB_ENC_INTERNED_STR_CSTR && HAVE_RB_INTERNED_STR_CSTR + if (Yes == dr->options.intern_strings) { + if (0 != dr->encoding) { + sym = rb_enc_interned_str_cstr(str, dr->encoding); + } else { + sym = rb_interned_str_cstr(str); + } + } else { +#endif sym = rb_str_new2(str); #if HAVE_RB_ENC_ASSOCIATE if (0 != dr->encoding) { rb_enc_associate(sym, dr->encoding); } +#endif +#if HAVE_RB_ENC_INTERNED_STR_CSTR && HAVE_RB_INTERNED_STR_CSTR + } #endif if (0 != strp) { *strp = StringValuePtr(sym); @@ -198,7 +214,11 @@ ox_sax_drive_error_at(SaxDrive dr, const char *msg, off_t pos, off_t line, off_t if (dr->has.error) { VALUE args[3]; +#if HAVE_RB_INTERNED_STR_CSTR + args[0] = rb_interned_str_cstr(msg); +#else args[0] = rb_str_new2(msg); +#endif args[1] = LONG2NUM(line); args[2] = LONG2NUM(col); if (dr->has.pos) { @@ -334,11 +354,19 @@ parse(SaxDrive dr) { off_t line = dr->buf.line; off_t col = dr->buf.col - 1; +#if HAVE_RB_ENC_INTERNED_STR_CSTR && HAVE_RB_INTERNED_STR_CSTR + if (0 != dr->encoding) { + args[0] = rb_enc_interned_str_cstr("", dr->encoding); + } else { + args[0] = rb_interned_str_cstr(""); + } +#else args[0] = rb_str_new2(""); #if HAVE_RB_ENC_ASSOCIATE if (0 != dr->encoding) { rb_enc_associate(args[0], dr->encoding); } +#endif #endif if (dr->has.pos) { rb_ivar_set(dr->handler, ox_at_pos_id, LONG2NUM(pos)); @@ -456,7 +484,11 @@ read_instruction(SaxDrive dr) { } is_xml = (0 == (dr->options.smart ? strcasecmp("xml", dr->buf.str) : strcmp("xml", dr->buf.str))); if (dr->has.instruct || dr->has.end_instruct) { +#if HAVE_RB_INTERNED_STR_CSTR + target = rb_interned_str_cstr(dr->buf.str); +#else target = rb_str_new2(dr->buf.str); +#endif } if (dr->has.instruct) { VALUE args[1]; @@ -492,11 +524,19 @@ read_instruction(SaxDrive dr) { if (dr->options.convert_special) { ox_sax_collapse_special(dr, content, (int)pos, (int)line, (int)col); } +#if HAVE_RB_ENC_INTERNED_STR_CSTR && HAVE_RB_INTERNED_STR_CSTR + if (0 != dr->encoding) { + args[0] = rb_enc_interned_str_cstr(dr->buf.str, dr->encoding); + } else { + args[0] = rb_interned_str_cstr(dr->buf.str); + } +#else args[0] = rb_str_new2(content); #if HAVE_RB_ENC_ASSOCIATE if (0 != dr->encoding) { rb_enc_associate(args[0], dr->encoding); } +#endif #endif if (dr->has.line) { rb_ivar_set(dr->handler, ox_at_line_id, LONG2NUM(line)); @@ -621,7 +661,11 @@ read_doctype(SaxDrive dr) { if (dr->has.column) { rb_ivar_set(dr->handler, ox_at_column_id, LONG2NUM(col)); } +#if HAVE_RB_INTERNED_STR_CSTR + args[0] = rb_interned_str_cstr(dr->buf.str); +#else args[0] = rb_str_new2(dr->buf.str); +#endif rb_funcall2(dr->handler, ox_doctype_id, 1, args); } dr->buf.str = 0; @@ -694,11 +738,19 @@ read_cdata(SaxDrive dr) { if (dr->has.cdata) { VALUE args[1]; +#if HAVE_RB_ENC_INTERNED_STR_CSTR && HAVE_RB_INTERNED_STR_CSTR + if (0 != dr->encoding) { + args[0] = rb_enc_interned_str_cstr(dr->buf.str, dr->encoding); + } else { + args[0] = rb_interned_str_cstr(dr->buf.str); + } +#else args[0] = rb_str_new2(dr->buf.str); #if HAVE_RB_ENC_ASSOCIATE if (0 != dr->encoding) { rb_enc_associate(args[0], dr->encoding); } +#endif #endif if (dr->has.pos) { rb_ivar_set(dr->handler, ox_at_pos_id, LONG2NUM(pos)); @@ -784,11 +836,19 @@ read_comment(SaxDrive dr) { if (NULL == parent || NULL == parent->hint || OffOverlay != parent->hint->overlay || (NULL != h && (ActiveOverlay == h->overlay || ActiveOverlay == h->overlay))) { +#if HAVE_RB_ENC_INTERNED_STR_CSTR && HAVE_RB_INTERNED_STR_CSTR + if (0 != dr->encoding) { + args[0] = rb_enc_interned_str_cstr(dr->buf.str, dr->encoding); + } else { + args[0] = rb_interned_str_cstr(dr->buf.str); + } +#else args[0] = rb_str_new2(dr->buf.str); #if HAVE_RB_ENC_ASSOCIATE if (0 != dr->encoding) { rb_enc_associate(args[0], dr->encoding); } +#endif #endif if (dr->has.pos) { rb_ivar_set(dr->handler, ox_at_pos_id, LONG2NUM(pos)); @@ -1112,11 +1172,19 @@ read_text(SaxDrive dr) { if (dr->has.text && ((NoSkip == dr->options.skip && !isEnd) || (OffSkip == dr->options.skip))) { +#if HAVE_RB_ENC_INTERNED_STR_CSTR && HAVE_RB_INTERNED_STR_CSTR + if (0 != dr->encoding) { + args[0] = rb_enc_interned_str_cstr(dr->buf.str, dr->encoding); + } else { + args[0] = rb_interned_str_cstr(dr->buf.str); + } +#else args[0] = rb_str_new2(dr->buf.str); #if HAVE_RB_ENC_ASSOCIATE if (0 != dr->encoding) { rb_enc_associate(args[0], dr->encoding); } +#endif #endif if (dr->has.pos) { rb_ivar_set(dr->handler, ox_at_pos_id, LONG2NUM(pos)); @@ -1163,11 +1231,19 @@ read_text(SaxDrive dr) { default: break; } +#if HAVE_RB_ENC_INTERNED_STR_CSTR && HAVE_RB_INTERNED_STR_CSTR + if (0 != dr->encoding) { + args[0] = rb_enc_interned_str_cstr(dr->buf.str, dr->encoding); + } else { + args[0] = rb_interned_str_cstr(dr->buf.str); + } +#else args[0] = rb_str_new2(dr->buf.str); #if HAVE_RB_ENC_ASSOCIATE if (0 != dr->encoding) { rb_enc_associate(args[0], dr->encoding); } +#endif #endif if (dr->has.pos) { rb_ivar_set(dr->handler, ox_at_pos_id, LONG2NUM(pos)); @@ -1245,11 +1321,19 @@ read_jump(SaxDrive dr, const char *pat) { } // TBD check parent overlay if (dr->has.text && !dr->blocked) { +#if HAVE_RB_ENC_INTERNED_STR_CSTR && HAVE_RB_INTERNED_STR_CSTR + if (0 != dr->encoding) { + args[0] = rb_enc_interned_str_cstr(dr->buf.str, dr->encoding); + } else { + args[0] = rb_interned_str_cstr(dr->buf.str); + } +#else args[0] = rb_str_new2(dr->buf.str); #if HAVE_RB_ENC_ASSOCIATE if (0 != dr->encoding) { rb_enc_associate(args[0], dr->encoding); } +#endif #endif if (dr->has.pos) { rb_ivar_set(dr->handler, ox_at_pos_id, LONG2NUM(pos)); @@ -1351,11 +1435,19 @@ read_attrs(SaxDrive dr, char c, char termc, char term2, int is_xml, int eq_req, if (dr->options.convert_special) { ox_sax_collapse_special(dr, dr->buf.str, pos, line, col); } +#if HAVE_RB_ENC_INTERNED_STR_CSTR && HAVE_RB_INTERNED_STR_CSTR + if (0 != dr->encoding) { + args[1] = rb_enc_interned_str_cstr(attr_value, dr->encoding); + } else { + args[1] = rb_interned_str_cstr(attr_value); + } +#else args[1] = rb_str_new2(attr_value); #if HAVE_RB_ENC_ASSOCIATE if (0 != dr->encoding) { rb_enc_associate(args[1], dr->encoding); } +#endif #endif if (dr->has.pos) { rb_ivar_set(dr->handler, ox_at_pos_id, LONG2NUM(pos)); diff --git a/ext/ox/sax.h b/ext/ox/sax.h index 90498541..717295bd 100644 --- a/ext/ox/sax.h +++ b/ext/ox/sax.h @@ -16,6 +16,7 @@ typedef struct _saxOptions { int symbolize; + int intern_strings; int convert_special; int smart; SkipMode skip; diff --git a/ext/ox/sax_as.c b/ext/ox/sax_as.c index 897335a6..12c0df3a 100644 --- a/ext/ox/sax_as.c +++ b/ext/ox/sax_as.c @@ -135,11 +135,23 @@ sax_value_as_s(VALUE self) { default: break; } +#if HAVE_RB_ENC_INTERNED_STR_CSTR && HAVE_RB_INTERNED_STR_CSTR + if (dr->options.intern_strings) { + if (0 != dr->encoding) { + rs = rb_enc_interned_str_cstr(dr->buf.str, dr->encoding); + } else { + rs = rb_interned_str_cstr(dr->buf.str); + } + } else { +#endif rs = rb_str_new2(dr->buf.str); #if HAVE_RB_ENC_ASSOCIATE if (0 != dr->encoding) { rb_enc_associate(rs, dr->encoding); } +#endif +#if HAVE_RB_ENC_INTERNED_STR_CSTR && HAVE_RB_INTERNED_STR_CSTR + } #endif return rs; } @@ -223,7 +235,11 @@ sax_value_as_time(VALUE self) { VALUE args[1]; /*printf("**** time parse\n"); */ +#if HAVE_RB_INTERNED_STR_CSTR + *args = rb_interned_str_cstr(str); +#else *args = rb_str_new2(str); +#endif t = rb_funcall2(ox_time_class, ox_parse_id, 1, args); } return t; diff --git a/test/tests.rb b/test/tests.rb index 2f33608c..204fdf47 100755 --- a/test/tests.rb +++ b/test/tests.rb @@ -44,6 +44,7 @@ :invalid_replace=>'', :strip_namespace=>false, :overlay=>nil, + :intern_strings=>false, } $ox_generic_options = { @@ -69,6 +70,7 @@ :invalid_replace=>'', :strip_namespace=>false, :overlay=>nil, + :intern_strings=>false, } class Func < ::Test::Unit::TestCase @@ -108,6 +110,7 @@ def test_set_options :invalid_replace=>'*', :strip_namespace=>'spaced', :overlay=>nil, + :intern_strings=>false, } o3 = { :xsd_date=>false } Ox.default_options = o2 @@ -434,7 +437,11 @@ def test_escape_special Ox::default_options = $ox_object_options xml = %{\nπ\n} doc = Ox.parse(xml) - assert_equal('π', doc.attributes[:name].force_encoding('UTF-8')) + if $ox_object_options['intern_strings'] + assert_equal('π', doc.attributes[:name].dup.force_encoding('UTF-8')) + else + assert_equal('π', doc.attributes[:name].force_encoding('UTF-8')) + end end def test_escape_dump_tolerant