Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Use "interned" Strings when possible to avoid uneccessary heap allocation. #276

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions ext/ox/extconf.rb
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,10 @@
have_func('rb_struct_alloc_noinit')
have_func('rb_obj_encoding')
have_func('rb_ivar_foreach')
have_func('rb_interned_str', 'ruby.h')
have_func('rb_interned_str_cstr', 'ruby.h')
have_func('rb_enc_interned_str', 'ruby.h')
have_func('rb_enc_interned_str_cstr', 'ruby.h')

have_header('ruby/st.h')
have_header('sys/uio.h')
Expand Down
171 changes: 165 additions & 6 deletions ext/ox/gen_load.c
Original file line number Diff line number Diff line change
Expand Up @@ -105,24 +105,56 @@ create_prolog_doc(PInfo pi, const char *target, Attr attrs) {
} else if (Yes == pi->options->sym_keys) {
#if HAVE_RB_ENC_ASSOCIATE
if (0 != pi->options->rb_enc) {
#if HAVE_RB_ENC_INTERNED_STR_CSTR
VALUE rstr = rb_enc_interned_str_cstr(attrs->name, pi->options->rb_enc);
#else
VALUE rstr = rb_str_new2(attrs->name);

rb_enc_associate(rstr, pi->options->rb_enc);
#endif
sym = rb_funcall(rstr, ox_to_sym_id, 0);
} else {
sym = ID2SYM(rb_intern(attrs->name));
}
sym = ID2SYM(rb_intern(attrs->name));
#endif // HAVE_RB_ENC_ASSOCIATE
#if HAVE_RB_INTERNED_STR_CSTR
if (Yes == pi->options->intern_strings) {
rb_hash_aset(ah, sym, rb_interned_str_cstr(attrs->value));
} else {
#endif
rb_hash_aset(ah, sym, rb_str_new2(attrs->value));
#if HAVE_RB_INTERNED_STR_CSTR
}
#endif
} else {
#if HAVE_RB_ENC_ASSOCIATE
#if HAVE_ENC_RB_INTERNED_STR_CSTR && HAVE_RB_INTERNED_STR_CSTR
if (Yes == pi->options->intern_strings) {
if (0 != pi->options->rb_enc) {
volatile VALUE rstr = rb_enc_interned_str_cstr(attrs->name, pi->options->rb_enc);
} else {
volatile VALUE rstr = rb_interned_str_cstr(attrs->name);
}
} else {
#endif
volatile VALUE rstr = rb_str_new2(attrs->name);

if (0 != pi->options->rb_enc) {
rb_enc_associate(rstr, pi->options->rb_enc);
}
#if HAVE_ENC_RB_INTERNED_STR_CSTR && HAVE_RB_INTERNED_STR_CSTR
}
#endif
#if HAVE_RB_INTERNED_STR_CSTR
if (Yes == pi->options->intern_strings) {
rb_hash_aset(ah, rstr, rb_interned_str_cstr(attrs->value));
} else {
#endif
rb_hash_aset(ah, rstr, rb_str_new2(attrs->value));
#if HAVE_RB_INTERNED_STR_CSTR
}
#endif
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The whole section here with the various #if checks is next to impossible to read. I think the volatile VALUE rstr statements are not used in the set and would expect a compile error under the right set of conditions. Hard to tell though. How about tolerating some code duplication in favour of readability?

#endif
}
if (0 == strcmp("encoding", attrs->name)) {
Expand Down Expand Up @@ -193,12 +225,25 @@ nomode_instruct(PInfo pi, const char *target, Attr attrs, const char *content) {
static void
add_doctype(PInfo pi, const char *docType) {
VALUE n = rb_obj_alloc(ox_doctype_clas);
VALUE s = rb_str_new2(docType);
VALUE s;
#if HAVE_RB_ENC_INTERNED_STR_CSTR && HAVE_RB_INTERNED_STR_CSTR
if (Yes == pi->options->intern_strings) {
if (0 != pi->options->rb_enc) {
s = rb_enc_interned_str_cstr(docType, pi->options->rb_enc);
} else {
s = rb_interned_str_cstr(docType);
}
} else {
#endif
s = rb_str_new2(docType);

#if HAVE_RB_ENC_ASSOCIATE
if (0 != pi->options->rb_enc) {
rb_enc_associate(s, pi->options->rb_enc);
}
#endif
#if HAVE_RB_ENC_INTERNED_STR_CSTR && HAVE_RB_INTERNED_STR_CSTR
}
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Similar comment to the above here. Don't cross if/else with #if conditionals. I know I might have violated that in some places but I have been fixing them as I run into them.

I see the same code duplicated many times. Can this be a put into a function?

#endif
rb_ivar_set(n, ox_at_value_id, s);
if (helper_stack_empty(&pi->helpers)) { /* top level object */
Expand All @@ -210,12 +255,25 @@ add_doctype(PInfo pi, const char *docType) {
static void
add_comment(PInfo pi, const char *comment) {
VALUE n = rb_obj_alloc(ox_comment_clas);
VALUE s = rb_str_new2(comment);
VALUE s;
#if HAVE_RB_ENC_INTERNED_STR_CSTR && HAVE_RB_INTERNED_STR_CSTR
if (Yes == pi->options->intern_strings) {
if (0 != pi->options->rb_enc) {
s = rb_enc_interned_str_cstr(comment, pi->options->rb_enc);
} else {
s = rb_interned_str_cstr(comment);
}
} else {
#endif
s = rb_str_new2(comment);

#if HAVE_RB_ENC_ASSOCIATE
if (0 != pi->options->rb_enc) {
rb_enc_associate(s, pi->options->rb_enc);
}
#endif
#if HAVE_RB_ENC_INTERNED_STR_CSTR && HAVE_RB_INTERNED_STR_CSTR
}
#endif
rb_ivar_set(n, ox_at_value_id, s);
if (helper_stack_empty(&pi->helpers)) { /* top level object */
Expand All @@ -227,12 +285,25 @@ add_comment(PInfo pi, const char *comment) {
static void
add_cdata(PInfo pi, const char *cdata, size_t len) {
VALUE n = rb_obj_alloc(ox_cdata_clas);
VALUE s = rb_str_new2(cdata);
VALUE s;
#if HAVE_RB_ENC_INTERNED_STR_CSTR && HAVE_RB_INTERNED_STR_CSTR
if (Yes == pi->options->intern_strings) {
if (0 != pi->options->rb_enc) {
s = rb_enc_interned_str_cstr(cdata, pi->options->rb_enc);
} else {
s = rb_interned_str_cstr(cdata);
}
} else {
#endif
s = rb_str_new2(cdata);

#if HAVE_RB_ENC_ASSOCIATE
if (0 != pi->options->rb_enc) {
rb_enc_associate(s, pi->options->rb_enc);
}
#endif
#if HAVE_RB_ENC_INTERNED_STR_CSTR && HAVE_RB_INTERNED_STR_CSTR
}
#endif
rb_ivar_set(n, ox_at_value_id, s);
if (helper_stack_empty(&pi->helpers)) { /* top level object */
Expand All @@ -243,12 +314,25 @@ add_cdata(PInfo pi, const char *cdata, size_t len) {

static void
add_text(PInfo pi, char *text, int closed) {
VALUE s = rb_str_new2(text);
VALUE s;
#if HAVE_RB_ENC_INTERNED_STR_CSTR && HAVE_RB_INTERNED_STR_CSTR
if (Yes == pi->options->intern_strings) {
if (0 != pi->options->rb_enc) {
s = rb_enc_interned_str_cstr(text, pi->options->rb_enc);
} else {
s = rb_interned_str_cstr(text);
}
} else {
#endif
s = rb_str_new2(text);

#if HAVE_RB_ENC_ASSOCIATE
if (0 != pi->options->rb_enc) {
rb_enc_associate(s, pi->options->rb_enc);
}
#endif
#if HAVE_RB_ENC_INTERNED_STR_CSTR && HAVE_RB_INTERNED_STR_CSTR
}
#endif
if (helper_stack_empty(&pi->helpers)) { /* top level object */
create_doc(pi);
Expand Down Expand Up @@ -285,9 +369,13 @@ add_element(PInfo pi, const char *ename, Attr attrs, int hasChildren) {
if (Qundef == (sym = ox_cache_get(ox_symbol_cache, attrs->name, &slot, 0))) {
#if HAVE_RB_ENC_ASSOCIATE
if (0 != pi->options->rb_enc) {
#if HAVE_RB_ENC_INTERNED_STR_CSTR
VALUE rstr = rb_enc_interned_str_cstr(attrs->name, pi->options->rb_enc);
#else
VALUE rstr = rb_str_new2(attrs->name);

rb_enc_associate(rstr, pi->options->rb_enc);
#endif
sym = rb_funcall(rstr, ox_to_sym_id, 0);
} else {
sym = ID2SYM(rb_intern(attrs->name));
Expand All @@ -301,18 +389,42 @@ add_element(PInfo pi, const char *ename, Attr attrs, int hasChildren) {
*slot = sym;
}
} else {
#if HAVE_RB_ENC_INTERNED_STR_CSTR && HAVE_RB_INTERNED_STR_CSTR
if (Yes == pi->options->intern_strings) {
if (0 != pi->options->rb_enc) {
sym = rb_enc_interned_str_cstr(attrs->name, pi->options->rb_enc);
} else {
sym = rb_interned_str_cstr(attrs->name);
}
} else {
#endif
sym = rb_str_new2(attrs->name);
#if HAVE_RB_ENC_ASSOCIATE
if (0 != pi->options->rb_enc) {
rb_enc_associate(sym, pi->options->rb_enc);
}
#endif
#if HAVE_RB_ENC_INTERNED_STR_CSTR && HAVE_RB_INTERNED_STR_CSTR
}
#endif
}
#if HAVE_RB_ENC_INTERNED_STR_CSTR && HAVE_RB_INTERNED_STR_CSTR
if (Yes == pi->options->intern_strings) {
if (0 != pi->options->rb_enc) {
s = rb_enc_interned_str_cstr(attrs->value, pi->options->rb_enc);
} else {
s = rb_interned_str_cstr(attrs->value);
}
} else {
#endif
s = rb_str_new2(attrs->value);
#if HAVE_RB_ENC_ASSOCIATE
if (0 != pi->options->rb_enc) {
rb_enc_associate(s, pi->options->rb_enc);
}
#if HAVE_RB_ENC_INTERNED_STR_CSTR && HAVE_RB_INTERNED_STR_CSTR
}
#endif
#endif
rb_hash_aset(ah, sym, s);
}
Expand Down Expand Up @@ -343,8 +455,24 @@ end_element(PInfo pi, const char *ename) {
static void
add_instruct(PInfo pi, const char *name, Attr attrs, const char *content) {
VALUE inst;
VALUE s = rb_str_new2(name);
VALUE c = Qnil;
VALUE s;
VALUE c;
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please initialize with Qnil. If not interned and content is NULL then c will not be set.

#if HAVE_RB_ENC_INTERNED_STR_CSTR && HAVE_RB_INTERNED_STR_CSTR
if (Yes == pi->options->intern_strings) {
if (0 != pi->options->rb_enc && 0 != content) {
s = rb_enc_interned_str_cstr(name, pi->options->rb_enc);
c = rb_enc_interned_str_cstr(content, pi->options->rb_enc);
} else if (0 != content) {
s = rb_interned_str_cstr(name);
c = rb_interned_str_cstr(content);
} else {
s = rb_interned_str_cstr(name);
c = Qnil;
}
} else {
c = Qnil;
#endif
s = rb_str_new2(name);

if (0 != content) {
c = rb_str_new2(content);
Expand All @@ -356,6 +484,9 @@ add_instruct(PInfo pi, const char *name, Attr attrs, const char *content) {
rb_enc_associate(c, pi->options->rb_enc);
}
}
#endif
#if HAVE_RB_ENC_INTERNED_STR_CSTR && HAVE_RB_INTERNED_STR_CSTR
}
#endif
inst = rb_obj_alloc(ox_instruct_clas);
rb_ivar_set(inst, ox_at_value_id, s);
Expand All @@ -374,9 +505,13 @@ add_instruct(PInfo pi, const char *name, Attr attrs, const char *content) {
if (Qundef == (sym = ox_cache_get(ox_symbol_cache, attrs->name, &slot, 0))) {
#if HAVE_RB_ENC_ASSOCIATE
if (0 != pi->options->rb_enc) {
#if HAVE_RB_ENC_INTERNED_STR_CSTR
VALUE rstr = rb_enc_interned_str_cstr(attrs->name, pi->options->rb_enc);
#else
VALUE rstr = rb_str_new2(attrs->name);

rb_enc_associate(rstr, pi->options->rb_enc);
#endif
sym = rb_funcall(rstr, ox_to_sym_id, 0);
} else {
sym = ID2SYM(rb_intern(attrs->name));
Expand All @@ -390,18 +525,42 @@ add_instruct(PInfo pi, const char *name, Attr attrs, const char *content) {
*slot = sym;
}
} else {
#if HAVE_RB_ENC_INTERNED_STR_CSTR && HAVE_RB_INTERNED_STR_CSTR
if (Yes == pi->options->intern_strings) {
if (0 != pi->options->rb_enc) {
sym = rb_enc_interned_str_cstr(attrs->name, pi->options->rb_enc);
} else {
sym = rb_interned_str_cstr(attrs->name);
}
} else {
#endif
sym = rb_str_new2(attrs->name);
#if HAVE_RB_ENC_ASSOCIATE
if (0 != pi->options->rb_enc) {
rb_enc_associate(sym, pi->options->rb_enc);
}
#endif
#if HAVE_RB_ENC_INTERNED_STR_CSTR && HAVE_RB_INTERNED_STR_CSTR
}
#endif
}
#if HAVE_RB_ENC_INTERNED_STR_CSTR && HAVE_RB_INTERNED_STR_CSTR
if (Yes == pi->options->intern_strings) {
if (0 != pi->options->rb_enc) {
s = rb_enc_interned_str_cstr(attrs->value, pi->options->rb_enc);
} else {
s = rb_interned_str_cstr(attrs->value);
}
} else {
#endif
s = rb_str_new2(attrs->value);
#if HAVE_RB_ENC_ASSOCIATE
if (0 != pi->options->rb_enc) {
rb_enc_associate(s, pi->options->rb_enc);
}
#endif
#if HAVE_RB_ENC_INTERNED_STR_CSTR && HAVE_RB_INTERNED_STR_CSTR
}
#endif
rb_hash_aset(ah, sym, s);
}
Expand Down
Loading