From 2cabd116b95d67dcd9d44fcb936dee03f4fc71b9 Mon Sep 17 00:00:00 2001 From: Gleb Mazovetskiy Date: Wed, 28 Nov 2018 00:31:26 +0000 Subject: [PATCH] Update utfcpp to v2.3.6 Fixes #2657 Incorporates the following utfcpp patches: 1. Sass addition of `retreat`. https://github.com/nemtrif/utfcpp/pull/20 2. Fix for `replace_invalid` throwing on incomplete sequence at the end of the input. https://github.com/nemtrif/utfcpp/pull/21 --- src/utf8/checked.h | 22 ++++++++++++---------- src/utf8/core.h | 3 +++ 2 files changed, 15 insertions(+), 10 deletions(-) diff --git a/src/utf8/checked.h b/src/utf8/checked.h index 693aee964..ce9d27c8a 100644 --- a/src/utf8/checked.h +++ b/src/utf8/checked.h @@ -1,4 +1,4 @@ -// Copyright 2006 Nemanja Trifunovic +// Copyright 2006-2016 Nemanja Trifunovic /* Permission is hereby granted, free of charge, to any person or organization @@ -41,7 +41,7 @@ namespace utf8 class invalid_code_point : public exception { uint32_t cp; public: - invalid_code_point(uint32_t cp) : cp(cp) {} + invalid_code_point(uint32_t codepoint) : cp(codepoint) {} virtual const char* what() const throw() { return "Invalid code point"; } uint32_t code_point() const {return cp;} }; @@ -107,7 +107,9 @@ namespace utf8 *out++ = *it; break; case internal::NOT_ENOUGH_ROOM: - throw not_enough_room(); + out = utf8::append (replacement, out); + start = end; + break; case internal::INVALID_LEAD: out = utf8::append (replacement, out); ++start; @@ -194,10 +196,10 @@ namespace utf8 } template - void retreat (octet_iterator& it, distance_type n, octet_iterator start) + void retreat (octet_iterator& it, distance_type n, octet_iterator end) { for (distance_type i = 0; i < n; ++i) - utf8::prior(it, start); + utf8::prior(it, end); } template @@ -240,7 +242,7 @@ namespace utf8 template u16bit_iterator utf8to16 (octet_iterator start, octet_iterator end, u16bit_iterator result) { - while (start != end) { + while (start < end) { uint32_t cp = utf8::next(start, end); if (cp > 0xffff) { //make a surrogate pair *result++ = static_cast((cp >> 10) + internal::LEAD_OFFSET); @@ -264,7 +266,7 @@ namespace utf8 template u32bit_iterator utf8to32 (octet_iterator start, octet_iterator end, u32bit_iterator result) { - while (start != end) + while (start < end) (*result++) = utf8::next(start, end); return result; @@ -279,9 +281,9 @@ namespace utf8 public: iterator () {} explicit iterator (const octet_iterator& octet_it, - const octet_iterator& range_start, - const octet_iterator& range_end) : - it(octet_it), range_start(range_start), range_end(range_end) + const octet_iterator& rangestart, + const octet_iterator& rangeend) : + it(octet_it), range_start(rangestart), range_end(rangeend) { if (it < range_start || it > range_end) throw std::out_of_range("Invalid utf-8 iterator position"); diff --git a/src/utf8/core.h b/src/utf8/core.h index f85081f8f..927942d92 100644 --- a/src/utf8/core.h +++ b/src/utf8/core.h @@ -222,6 +222,9 @@ namespace internal template utf_error validate_next(octet_iterator& it, octet_iterator end, uint32_t& code_point) { + if (it == end) + return NOT_ENOUGH_ROOM; + // Save the original value of it so we can go back in case of failure // Of course, it does not make much sense with i.e. stream iterators octet_iterator original_it = it;