Skip to content

Commit 4da32a7

Browse files
committed
embed.fnc: Add EPTRQ hints
Generally, a pointer to a string upper bound actually to one beyond the actual final byte in the string. This is sanctioned by the C Standard, and allows you to just subtract the lower bound from it to get its length, without having to add 1. But some function we have tolerate the upper bound pointer being set to the actual final byte. The EPTRQ hint in embed.fnc is used for those; the assertion becomes 'l <= u' instead of strictly less-than. This commit is the first to use this type of hint, and it applies it only to those functions whose documentation and behavior clearly indicate this is expected. There's a dozen-ish ones where that isn't true. And they need to be investigated further before deciding their disposition.
1 parent 67cb700 commit 4da32a7

File tree

7 files changed

+79
-96
lines changed

7 files changed

+79
-96
lines changed

embed.fnc

Lines changed: 38 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -1074,17 +1074,17 @@ Rp |SV * |defelem_target |NN SV *sv \
10741074
|NULLOK MAGIC *mg
10751075
: Used in op.c, perl.c
10761076
px |void |delete_eval_scope
1077-
ATdp |char * |delimcpy |NN char *to \
1078-
|NN const char *to_end \
1079-
|NN const char *from \
1080-
|NN const char *from_end \
1077+
ATdp |char * |delimcpy |SPTR char *to \
1078+
|EPTRQ const char *to_end \
1079+
|SPTR const char *from \
1080+
|EPTRQ const char *from_end \
10811081
|const int delim \
10821082
|NN I32 *retlen
10831083
ETXdp |char * |delimcpy_no_escape \
1084-
|NN char *to \
1085-
|NN const char *to_end \
1086-
|NN const char *from \
1087-
|NN const char *from_end \
1084+
|SPTR char *to \
1085+
|EPTRQ const char *to_end \
1086+
|SPTR const char *from \
1087+
|EPTRQ const char *from_end \
10881088
|const int delim \
10891089
|NN I32 *retlen
10901090
Cp |void |despatch_signals
@@ -1270,8 +1270,8 @@ Adfp |void |fatal_warner |U32 err \
12701270
|...
12711271
Adp |void |fbm_compile |NN SV *sv \
12721272
|U32 flags
1273-
ARdp |char * |fbm_instr |NN unsigned char *big \
1274-
|NN unsigned char *bigend \
1273+
ARdp |char * |fbm_instr |SPTR unsigned char *big \
1274+
|EPTRQ unsigned char *bigend \
12751275
|NN SV *littlestr \
12761276
|U32 flags
12771277
Adhp |SV * |filter_add |NULLOK filter_t funcp \
@@ -1335,8 +1335,8 @@ Adfp |char * |form |NN const char *pat \
13351335
p |void |free_tied_hv_pool
13361336
Cp |void |free_tmps
13371337
ERXp |SV * |get_and_check_backslash_N_name \
1338-
|NN const char *s \
1339-
|NN const char *e \
1338+
|SPTR const char *s \
1339+
|EPTRQ const char *e \
13401340
|const bool is_utf8 \
13411341
|NN const char **error_msg
13421342
AOdp |AV * |get_av |NN const char *name \
@@ -1733,8 +1733,8 @@ p |bool |io_close |NN IO *io \
17331733
|bool is_explicit \
17341734
|bool warn_on_fail
17351735
ARTdip |Size_t |isC9_STRICT_UTF8_CHAR \
1736-
|NN const U8 * const s0 \
1737-
|NN const U8 * const e
1736+
|SPTR const U8 * const s0 \
1737+
|EPTRQ const U8 * const e
17381738
ARTdmp |bool |is_c9strict_utf8_string \
17391739
|NN const U8 *s \
17401740
|STRLEN len
@@ -1760,8 +1760,8 @@ ARdip |bool |is_safe_syscall|NN const char *pv \
17601760
|NN const char *what \
17611761
|NN const char *op_name
17621762
ARTdip |Size_t |isSTRICT_UTF8_CHAR \
1763-
|NN const U8 * const s0 \
1764-
|NN const U8 * const e
1763+
|SPTR const U8 * const s0 \
1764+
|EPTRQ const U8 * const e
17651765
ARTdmp |bool |is_strict_utf8_string \
17661766
|NN const U8 *s \
17671767
|STRLEN len
@@ -1780,14 +1780,14 @@ CRp |bool |is_uni_perl_idcont_ \
17801780
|UV c
17811781
CRp |bool |is_uni_perl_idstart_ \
17821782
|UV c
1783-
ARTdip |Size_t |isUTF8_CHAR |NN const U8 * const s0 \
1784-
|NN const U8 * const e
1783+
ARTdip |Size_t |isUTF8_CHAR |SPTR const U8 * const s0 \
1784+
|EPTRQ const U8 * const e
17851785
ATdmp |STRLEN |is_utf8_char_buf \
17861786
|SPTR const U8 *buf \
17871787
|EPTR const U8 *buf_end
17881788
ARTdip |Size_t |isUTF8_CHAR_flags \
1789-
|NN const U8 * const s0 \
1790-
|NN const U8 * const e \
1789+
|SPTR const U8 * const s0 \
1790+
|EPTRQ const U8 * const e \
17911791
|const U32 flags
17921792
CPRTp |STRLEN |is_utf8_char_helper_ \
17931793
|SPTR const U8 * const s \
@@ -2905,10 +2905,10 @@ Admp |void |resume_compcv_and_save \
29052905
|NN struct suspended_compcv *buffer
29062906
Admp |void |resume_compcv_final \
29072907
|NN struct suspended_compcv *buffer
2908-
APTdp |char * |rninstr |NN const char *big \
2909-
|NN const char *bigend \
2910-
|NN const char *little \
2911-
|NN const char *lend
2908+
APTdp |char * |rninstr |SPTR const char *big \
2909+
|EPTRQ const char *bigend \
2910+
|SPTR const char *little \
2911+
|EPTRQ const char *lend
29122912
p |void |rpeep |NULLOK OP *o
29132913
Adipx |void |rpp_context |NN SV **mark \
29142914
|U8 gimme \
@@ -3808,14 +3808,14 @@ ARTdip |U8 * |utf8_hop_overshoot \
38083808
|MPTR const U8 *s \
38093809
|SSize_t off \
38103810
|SPTR const U8 * const start \
3811-
|NN const U8 * const end \
3811+
|EPTRQ const U8 * const end \
38123812
|NULLOK SSize_t *remaining
38133813
ARTdmp |U8 * |utf8_hop_safe |MPTR const U8 *s \
38143814
|SSize_t off \
38153815
|SPTR const U8 * const start \
3816-
|NN const U8 * const end
3817-
ARdp |STRLEN |utf8_length |NN const U8 *s0 \
3818-
|NN const U8 *e
3816+
|EPTRQ const U8 * const end
3817+
ARdp |STRLEN |utf8_length |SPTR const U8 *s0 \
3818+
|EPTRQ const U8 *e
38193819
ATdmp |UV |utf8n_to_uvchr |NN const U8 *s \
38203820
|STRLEN curlen \
38213821
|NULLOK STRLEN *retlen \
@@ -5803,20 +5803,20 @@ EWi |void |capture_clear |NN regexp *rex \
58035803
|NN const char *str
58045804
ERS |char * |find_byclass |NN regexp *prog \
58055805
|NN const regnode *c \
5806-
|NN char *s \
5807-
|NN const char *strend \
5806+
|SPTR char *s \
5807+
|EPTRQ const char *strend \
58085808
|NULLOK regmatch_info *reginfo
58095809
ERST |U8 * |find_next_masked \
5810-
|NN U8 *s \
5811-
|NN const U8 *send \
5810+
|SPTR U8 *s \
5811+
|EPTRQ const U8 *send \
58125812
|const U8 byte \
58135813
|const U8 mask
58145814
ERST |U8 * |find_span_end |SPTR U8 *s \
58155815
|EPTR const U8 *send \
58165816
|const U8 span_byte
58175817
ERST |U8 * |find_span_end_mask \
5818-
|NN U8 *s \
5819-
|NN const U8 *send \
5818+
|SPTR U8 *s \
5819+
|EPTRQ const U8 *send \
58205820
|const U8 span_byte \
58215821
|const U8 mask
58225822
Ei |I32 |foldEQ_latin1_s2_folded \
@@ -6125,10 +6125,10 @@ S |char * |force_word |NN char *start \
61256125
|int check_keyword \
61266126
|int allow_pack
61276127
RS |SV * |get_and_check_backslash_N_name_wrapper \
6128-
|NN const char *s \
6129-
|NN const char * const e
6130-
S |void |incline |NN const char *s \
6131-
|NN const char *end
6128+
|SPTR const char *s \
6129+
|EPTRQ const char * const e
6130+
S |void |incline |SPTR const char *s \
6131+
|EPTRQ const char *end
61326132
S |int |intuit_method |NN char *start \
61336133
|NULLOK SV *ioname \
61346134
|NULLOK NOCHECK CV *cv

inline.h

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2329,6 +2329,7 @@ Perl_is_utf8_string_loclen(const U8 *s, STRLEN len, const U8 **ep, STRLEN *el)
23292329
* But we don't know this for non-core calls, so have to retain the check for
23302330
* them. */
23312331
#ifdef PERL_CORE
2332+
//XXX see where uses this, the non-core can be removed,
23322333
# define PERL_NON_CORE_CHECK_EMPTY(s,e) assert((e) > (s))
23332334
#else
23342335
# define PERL_NON_CORE_CHECK_EMPTY(s,e) if ((e) <= (s)) return FALSE
@@ -3008,8 +3009,6 @@ Perl_utf8_hop_overshoot(const U8 *s, SSize_t off,
30083009
{
30093010
PERL_ARGS_ASSERT_UTF8_HOP_OVERSHOOT;
30103011

3011-
assert(s <= end);
3012-
30133012
if (off >= 0) {
30143013
return utf8_hop_forward_overshoot(s, off, end, remaining);
30153014
}

proto.h

Lines changed: 18 additions & 16 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

regexec.c

Lines changed: 8 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -626,14 +626,12 @@ S_isFOO_utf8_lc(pTHX_ const U8 classnum, const U8* character, const U8* e)
626626
STATIC U8 *
627627
S_find_span_end(U8 * s, const U8 * send, const U8 span_byte)
628628
{
629+
PERL_ARGS_ASSERT_FIND_SPAN_END;
630+
629631
/* Returns the position of the first byte in the sequence between 's' and
630632
* 'send-1' inclusive that isn't 'span_byte'; returns 'send' if none found.
631633
* */
632634

633-
PERL_ARGS_ASSERT_FIND_SPAN_END;
634-
635-
assert(send >= s);
636-
637635
if ((STRLEN) (send - s) >= PERL_WORDSIZE
638636
+ PERL_WORDSIZE * PERL_IS_SUBWORD_ADDR(s)
639637
- (PTR2nat(s) & PERL_WORD_BOUNDARY_MASK))
@@ -700,16 +698,14 @@ S_find_span_end(U8 * s, const U8 * send, const U8 span_byte)
700698
STATIC U8 *
701699
S_find_next_masked(U8 * s, const U8 * send, const U8 byte, const U8 mask)
702700
{
701+
PERL_ARGS_ASSERT_FIND_NEXT_MASKED;
702+
assert((byte & mask) == byte);
703+
703704
/* Returns the position of the first byte in the sequence between 's'
704705
* and 'send-1' inclusive that when ANDed with 'mask' yields 'byte';
705706
* returns 'send' if none found. It uses word-level operations instead of
706707
* byte to speed up the process */
707708

708-
PERL_ARGS_ASSERT_FIND_NEXT_MASKED;
709-
710-
assert(send >= s);
711-
assert((byte & mask) == byte);
712-
713709
#ifndef EBCDIC
714710

715711
if ((STRLEN) (send - s) >= PERL_WORDSIZE
@@ -778,15 +774,14 @@ S_find_next_masked(U8 * s, const U8 * send, const U8 byte, const U8 mask)
778774
STATIC U8 *
779775
S_find_span_end_mask(U8 * s, const U8 * send, const U8 span_byte, const U8 mask)
780776
{
777+
PERL_ARGS_ASSERT_FIND_SPAN_END_MASK;
778+
781779
/* Returns the position of the first byte in the sequence between 's' and
782780
* 'send-1' inclusive that when ANDed with 'mask' isn't 'span_byte'.
783781
* 'span_byte' should have been ANDed with 'mask' in the call of this
784782
* function. Returns 'send' if none found. Works like find_span_end(),
785783
* except for the AND */
786784

787-
PERL_ARGS_ASSERT_FIND_SPAN_END_MASK;
788-
789-
assert(send >= s);
790785
assert((span_byte & mask) == span_byte);
791786

792787
if ((STRLEN) (send - s) >= PERL_WORDSIZE
@@ -2272,6 +2267,7 @@ STATIC char *
22722267
S_find_byclass(pTHX_ regexp * prog, const regnode *c, char *s,
22732268
const char *strend, regmatch_info *reginfo)
22742269
{
2270+
PERL_ARGS_ASSERT_FIND_BYCLASS;
22752271

22762272
/* true if x+ need not match at just the 1st pos of run of x's */
22772273
const I32 doevery = (prog->intflags & PREGf_SKIP) == 0;
@@ -2304,8 +2300,6 @@ S_find_byclass(pTHX_ regexp * prog, const regnode *c, char *s,
23042300

23052301
RXi_GET_DECL(prog,progi);
23062302

2307-
PERL_ARGS_ASSERT_FIND_BYCLASS;
2308-
23092303
/* We know what class it must start with. The case statements below have
23102304
* encoded the OP, and the UTF8ness of the target ('t8' for is UTF-8; 'tb'
23112305
* for it isn't; 'b' stands for byte), and the UTF8ness of the pattern

0 commit comments

Comments
 (0)