Skip to content

Commit

Permalink
Implement "max waste" thresholds to avoid problems with COW and delib…
Browse files Browse the repository at this point in the history
…erately overallocated pvs

COW does not play nicely with "preallocate" algorithms.

More specifically code like sv_gets() wants to preallocate a large
buffer into $_ for performance reasons.

Prior to COW this was just fine. When someone assigned $_ to a
less volatile variable only the used portion of the buffer was copied,
and the extended buffer would be reused by sv_gets() and all was well.

With COW however this process is foiled. The preallocated buffer
get shared, and then when $_ is updated the buffer is dropped from $_,
leaving the other SV holding ownership of the overallocated buffer,
and causing sv_gets() to allocate a new buffer entirely. This process
would then repeat consuming time and lots of memory.

This patch introduces a "wastage" check to COW. When decided if we
should COW a string we look at the ratio and difference of SvCUR(sv)
and SvLEN(sv), which represent the "actual string length" and the
"allocated string length". When the difference exceeds a hard threshold,
or when the ration exceeds a designated factor then we do not COW.

This means that strings with large overallocations are not COWed.
Exactly how this works out in practice, where SvGROW() *always*
overallocates, is an open question.

See: https://rt.perl.org/Ticket/Display.html?id=121796

This patch also slightly tweaks SvGROW() not to do roundup on the first
allocation of the pv. Odds are good that the initial request realy does
want exactly what they expected. (SvGROW contrary to what the name
suggests is used for bother *extended* the size of a pv, and
initializing it the first time.)
  • Loading branch information
demerphq committed May 12, 2014
1 parent c8180b0 commit e8c6a47
Show file tree
Hide file tree
Showing 2 changed files with 71 additions and 23 deletions.
88 changes: 71 additions & 17 deletions sv.c
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,69 @@
char *gconvert(double, int, int, char *);
#endif

#ifdef PERL_NEW_COPY_ON_WRITE
# ifndef SV_COW_THRESHOLD
# define SV_COW_THRESHOLD 0 /* COW iff len > K */
# endif
# ifndef SV_COWBUF_THRESHOLD
# define SV_COWBUF_THRESHOLD 1250 /* COW iff len > K */
# endif
# ifndef SV_COW_MAX_WASTE_THRESHOLD
# define SV_COW_MAX_WASTE_THRESHOLD 80 /* COW iff (len - cur) < K */
# endif
# ifndef SV_COWBUF_WASTE_THRESHOLD
# define SV_COWBUF_WASTE_THRESHOLD 80 /* COW iff (len - cur) < K */
# endif
# ifndef SV_COW_MAX_WASTE_FACTOR_THRESHOLD
# define SV_COW_MAX_WASTE_FACTOR_THRESHOLD 2 /* COW iff len < (cur * K) */
# endif
# ifndef SV_COWBUF_WASTE_FACTOR_THRESHOLD
# define SV_COWBUF_WASTE_FACTOR_THRESHOLD 2 /* COW iff len < (cur * K) */
# endif
#endif
/* Work around compiler warnings about unsigned >= THRESHOLD when thres-
hold is 0. */
#if SV_COW_THRESHOLD
# define GE_COW_THRESHOLD(cur) ((cur) >= SV_COW_THRESHOLD)
#else
# define GE_COW_THRESHOLD(cur) 1
#endif
#if SV_COWBUF_THRESHOLD
# define GE_COWBUF_THRESHOLD(cur) ((cur) >= SV_COWBUF_THRESHOLD)
#else
# define GE_COWBUF_THRESHOLD(cur) 1
#endif
#if SV_COW_MAX_WASTE_THRESHOLD
# define GE_COW_MAX_WASTE_THRESHOLD(cur,len) (((len)-(cur)) < SV_COW_MAX_WASTE_THRESHOLD)
#else
# define GE_COW_MAX_WASTE_THRESHOLD(cur,len) 1
#endif
#if SV_COWBUF_WASTE_THRESHOLD
# define GE_COWBUF_WASTE_THRESHOLD(cur,len) (((len)-(cur)) < SV_COWBUF_WASTE_THRESHOLD)
#else
# define GE_COWBUF_WASTE_THRESHOLD(cur,len) 1
#endif
#if SV_COW_MAX_WASTE_FACTOR_THRESHOLD
# define GE_COW_MAX_WASTE_FACTOR_THRESHOLD(cur,len) ((len) < SV_COW_MAX_WASTE_FACTOR_THRESHOLD * (cur))
#else
# define GE_COW_MAX_WASTE_FACTOR_THRESHOLD(cur,len) 1
#endif
#if SV_COWBUF_WASTE_FACTOR_THRESHOLD
# define GE_COWBUF_WASTE_FACTOR_THRESHOLD(cur,len) ((len) < SV_COWBUF_WASTE_FACTOR_THRESHOLD * (cur))
#else
# define GE_COWBUF_WASTE_FACTOR_THRESHOLD(cur,len) 1
#endif

#define CHECK_COW_THRESHOLD(cur,len) (\
GE_COW_THRESHOLD((cur)) && \
GE_COW_MAX_WASTE_THRESHOLD((cur),(len)) && \
GE_COW_MAX_WASTE_FACTOR_THRESHOLD((cur),(len)) \
)
#define CHECK_COWBUF_THRESHOLD(cur,len) (\
GE_COWBUF_THRESHOLD((cur)) && \
GE_COWBUF_WASTE_THRESHOLD((cur),(len)) && \
GE_COWBUF_WASTE_FACTOR_THRESHOLD((cur),(len)) \
)
/* void Gconvert: on Linux at least, gcvt (which Gconvert gets deffed to),
* has a mandatory return value, even though that value is just the same
* as the buf arg */
Expand Down Expand Up @@ -1524,7 +1587,8 @@ Perl_sv_grow(pTHX_ SV *const sv, STRLEN newlen)
if (newlen < minlen)
newlen = minlen;
#ifndef Perl_safesysmalloc_size
newlen = PERL_STRLEN_ROUNDUP(newlen);
if (SvLEN(sv))
newlen = PERL_STRLEN_ROUNDUP(newlen);
#endif
if (SvLEN(sv) && s) {
s = (char*)saferealloc(s, newlen);
Expand Down Expand Up @@ -3987,18 +4051,8 @@ S_glob_assign_ref(pTHX_ SV *const dstr, SV *const sstr)
return;
}

/* Work around compiler warnings about unsigned >= THRESHOLD when thres-
hold is 0. */
#if SV_COW_THRESHOLD
# define GE_COW_THRESHOLD(len) ((len) >= SV_COW_THRESHOLD)
#else
# define GE_COW_THRESHOLD(len) 1
#endif
#if SV_COWBUF_THRESHOLD
# define GE_COWBUF_THRESHOLD(len) ((len) >= SV_COWBUF_THRESHOLD)
#else
# define GE_COWBUF_THRESHOLD(len) 1
#endif



#ifdef PERL_DEBUG_READONLY_COW
# include <sys/mman.h>
Expand Down Expand Up @@ -4366,7 +4420,7 @@ Perl_sv_setsv_flags(pTHX_ SV *dstr, SV* sstr, const I32 flags)
|| ((sflags & (SVs_PADTMP|SVf_READONLY|SVf_IsCOW))
== SVs_PADTMP
/* whose buffer is worth stealing */
&& GE_COWBUF_THRESHOLD(cur)
&& CHECK_COWBUF_THRESHOLD(cur,len)
)
) &&
!(sflags & SVf_OOK) && /* and not involved in OOK hack? */
Expand Down Expand Up @@ -4400,14 +4454,14 @@ Perl_sv_setsv_flags(pTHX_ SV *dstr, SV* sstr, const I32 flags)
#elif defined(PERL_NEW_COPY_ON_WRITE)
(sflags & SVf_IsCOW
? (!len ||
( (GE_COWBUF_THRESHOLD(cur) || SvLEN(dstr) < cur+1)
( (CHECK_COWBUF_THRESHOLD(cur,len) || SvLEN(dstr) < cur+1)
/* If this is a regular (non-hek) COW, only so
many COW "copies" are possible. */
&& CowREFCNT(sstr) != SV_COW_REFCNT_MAX ))
: ( (sflags & CAN_COW_MASK) == CAN_COW_FLAGS
&& !(SvFLAGS(dstr) & SVf_BREAK)
&& GE_COW_THRESHOLD(cur) && cur+1 < len
&& (GE_COWBUF_THRESHOLD(cur) || SvLEN(dstr) < cur+1)
&& CHECK_COW_THRESHOLD(cur,len) && cur+1 < len
&& (CHECK_COWBUF_THRESHOLD(cur,len) || SvLEN(dstr) < cur+1)
))
#else
sflags & SVf_IsCOW
Expand Down
6 changes: 0 additions & 6 deletions sv.h
Original file line number Diff line number Diff line change
Expand Up @@ -1909,12 +1909,6 @@ mg.c:1024: warning: left-hand operand of comma expression has no effect
/* Note: To allow 256 COW "copies", a refcnt of 0 means 1. */
# define CowREFCNT(sv) (*(U8 *)(SvPVX(sv)+SvLEN(sv)-1))
# define SV_COW_REFCNT_MAX ((1 << sizeof(U8)*8) - 1)
# ifndef SV_COW_THRESHOLD
# define SV_COW_THRESHOLD 0 /* min string length for cow */
# endif
# ifndef SV_COWBUF_THRESHOLD
# define SV_COWBUF_THRESHOLD 1250 /* min string length for cow */
# endif /* over existing buffer */
# define CAN_COW_MASK (SVf_POK|SVf_ROK|SVp_POK|SVf_FAKE| \
SVf_OOK|SVf_BREAK|SVf_READONLY)
# endif
Expand Down

0 comments on commit e8c6a47

Please sign in to comment.