Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Check number syntax is valid in hts_parse_decimal() #1400

Merged
merged 1 commit into from
Feb 16, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 11 additions & 15 deletions hts.c
Original file line number Diff line number Diff line change
Expand Up @@ -3482,39 +3482,32 @@ static inline long long push_digit(long long i, char c)
long long hts_parse_decimal(const char *str, char **strend, int flags)
{
long long n = 0;
int decimals = 0, e = 0, lost = 0, has_digit = 0;
int digits = 0, decimals = 0, e = 0, lost = 0;
char sign = '+', esign = '+';
const char *s;
const char *s, *str_orig = str;

while (isspace_c(*str)) str++;
s = str;

if (*s == '+' || *s == '-') sign = *s++;
while (*s)
if (isdigit_c(*s)) n = push_digit(n, *s++), has_digit = 1;
if (isdigit_c(*s)) digits++, n = push_digit(n, *s++);
else if (*s == ',' && (flags & HTS_PARSE_THOUSANDS_SEP)) s++;
else break;

if (*s == '.') {
s++;
while (isdigit_c(*s)) decimals++, n = push_digit(n, *s++), has_digit = 1;
}

// there must have been a digit or else cannot be a valid number
if ( !has_digit )
{
if ( strend ) *strend = (char*)str;
return 0;
while (isdigit_c(*s)) decimals++, digits++, n = push_digit(n, *s++);
}

if (*s == 'E' || *s == 'e') {
switch (*s) {
case 'e': case 'E':
s++;
if (*s == '+' || *s == '-') esign = *s++;
while (isdigit_c(*s)) e = push_digit(e, *s++);
if (esign == '-') e = -e;
}
break;

switch (*s) {
case 'k': case 'K': e += 3; s++; break;
case 'm': case 'M': e += 6; s++; break;
case 'g': case 'G': e += 9; s++; break;
Expand All @@ -3529,7 +3522,10 @@ long long hts_parse_decimal(const char *str, char **strend, int flags)
}

if (strend) {
*strend = (char *)s;
// Set to the original input str pointer if not valid number syntax
*strend = (digits > 0)? (char *)s : (char *)str_orig;
} else if (digits == 0) {
hts_log_warning("Invalid numeric value %.8s[truncated]", str);
} else if (*s) {
if ((flags & HTS_PARSE_THOUSANDS_SEP) || (!(flags & HTS_PARSE_THOUSANDS_SEP) && *s != ','))
hts_log_warning("Ignoring unknown characters after %.*s[%s]", (int)(s - str), str, s);
Expand Down
24 changes: 20 additions & 4 deletions htslib/hts.h
Original file line number Diff line number Diff line change
Expand Up @@ -1137,10 +1137,26 @@ int hts_idx_nseq(const hts_idx_t *idx);
@param strend If non-NULL, set on return to point to the first character
in @a str after those forming the parsed number
@param flags Or'ed-together combination of HTS_PARSE_* flags
@return Converted value of the parsed number.

When @a strend is NULL, a warning will be printed (if hts_verbose is HTS_LOG_WARNING
or more) if there are any trailing characters after the number.
@return Integer value of the parsed number, or 0 if no valid number

The input string is parsed as: optional whitespace; an optional '+' or
'-' sign; decimal digits possibly including ',' characters (if @a flags
includes HTS_PARSE_THOUSANDS_SEP) and a '.' decimal point; and an optional
case-insensitive suffix, which may be either 'k', 'M', 'G', or scientific
notation consisting of 'e'/'E' followed by an optional '+' or '-' sign and
decimal digits. To be considered a valid numeric value, the main part (not
including any suffix or scientific notation) must contain at least one
digit (either before or after the decimal point).

When @a strend is NULL, @a str is expected to contain only (optional
whitespace followed by) the numeric value. A warning will be printed
(if hts_verbose is HTS_LOG_WARNING or more) if no valid parsable number
is found or if there are any unused characters after the number.

When @a strend is non-NULL, @a str starts with (optional whitespace
followed by) the numeric value. On return, @a strend is set to point
to the first unused character after the numeric value, or to @a str
if no valid parsable number is found.
*/
HTSLIB_EXPORT
long long hts_parse_decimal(const char *str, char **strend, int flags);
Expand Down
32 changes: 32 additions & 0 deletions test/sam.c
Original file line number Diff line number Diff line change
Expand Up @@ -1655,6 +1655,37 @@ static int read_data_block(const char *in_name, samFile *fp_in,
return ret;
}

static void test_parse_decimal1(long long exp, const char *str, size_t exp_consumed, int flags, const char *warning)
{
if (warning) fprintf(stderr, "(Expect %s message for \"%s\")\n", warning, str);

long long val = hts_parse_decimal(str, NULL, flags);
if (val != exp) fail("hts_parse_decimal(\"%s\", NULL, %d) returned %lld, expected %lld", str, flags, val, exp);

char *end;
val = hts_parse_decimal(str, &end, flags);
if (val != exp) fail("hts_parse_decimal(\"%s\", ..., %d) returned %lld, expected %lld", str, flags, val, exp);
size_t consumed = end - str;
if (consumed != exp_consumed) fail("hts_parse_decimal(\"%s\", ..., %d) consumed %zu chars, expected %zu", str, flags, consumed, exp_consumed);
}

static void test_parse_decimal(void)
{
test_parse_decimal1(37, "+37", 3, 0, NULL);
test_parse_decimal1(-1001, " \t -1,001x", 9, HTS_PARSE_THOUSANDS_SEP, "trailing 'x'");
test_parse_decimal1(LLONG_MAX, "+9223372036854775807", 20, 0, NULL);
test_parse_decimal1(LLONG_MIN, "-9,223,372,036,854,775,808", 26, HTS_PARSE_THOUSANDS_SEP, NULL);
test_parse_decimal1(1500, "1.5e3", 5, 0, NULL);
test_parse_decimal1(1500, "1.5e+3k", 6, 0, "trailing 'k'");
test_parse_decimal1(1500000000, "1.5G", 4, 0, NULL);
test_parse_decimal1(12345, "12.345k", 7, 0, NULL);
test_parse_decimal1(12345, "12.3456k", 8, 0, "dropped fraction");
test_parse_decimal1(0, "A", 0, 0, "invalid numeric");
test_parse_decimal1(0, "G", 0, 0, "invalid numeric");
test_parse_decimal1(0, " +/-", 0, 0, "invalid numeric");
test_parse_decimal1(0, " \t -.e+9999", 0, 0, "invalid numeric");
}

static void test_mempolicy(void)
{
size_t bufsz = MAX_RECS * REC_LENGTH, nrecs = 0, i;
Expand Down Expand Up @@ -2194,6 +2225,7 @@ int main(int argc, char **argv)
check_cigar_tab();
check_big_ref(0);
check_big_ref(1);
test_parse_decimal();
test_mempolicy();
set_qname();
for (i = 1; i < argc; i++) faidx1(argv[i]);
Expand Down