Skip to content

Commit

Permalink
Fix GetCharacterAndWidth() and CharacterAfter() bugs for DBCS code pa…
Browse files Browse the repository at this point in the history
  • Loading branch information
zufuliu committed Jun 23, 2021
1 parent 9164ccd commit 3e7741f
Show file tree
Hide file tree
Showing 3 changed files with 37 additions and 48 deletions.
1 change: 1 addition & 0 deletions scintilla/lexers/LexBatch.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -338,6 +338,7 @@ void ColouriseBatchDoc(Sci_PositionU startPos, Sci_Position length, int initStyl
varQuoteChar = '\0';
sc.ChangeState(outerStyle);
sc.Rewind();
sc.Forward();
}
}
if (varQuoteChar == '\0') {
Expand Down
32 changes: 12 additions & 20 deletions scintilla/lexlib/StyleContext.h
Original file line number Diff line number Diff line change
Expand Up @@ -49,15 +49,14 @@ class StyleContext final {
int chPrev;
int ch;
int chNext;
Sci_Position width;
Sci_Position widthNext;
Sci_Position width = 1;
Sci_Position widthNext = 1;

StyleContext(Sci_PositionU startPos, Sci_PositionU length,
int initStyle, LexAccessor &styler_) noexcept :
styler(styler_),
endPos(startPos + length),
lengthDocument(styler.Length()),
currentPos(startPos),
multiByteAccess(styler.Encoding() == EncodingType::dbcs),
state(initStyle) {
styler.StartAt(startPos);
Expand All @@ -70,18 +69,7 @@ class StyleContext final {
}
lineDocEnd = styler.GetLine(lengthDocument);
atLineStart = static_cast<Sci_PositionU>(styler.LineStart(currentLine)) == startPos;

chPrev = 0;
width = 1;
widthNext = 1;
if (!multiByteAccess) {
ch = static_cast<unsigned char>(styler[startPos]);
} else {
ch = styler.GetCharacterAndWidth(startPos, &widthNext);
width = widthNext;
}

GetNextChar();
SeekTo(startPos);
}
// Deleted so StyleContext objects can not be copied.
StyleContext(const StyleContext &) = delete;
Expand Down Expand Up @@ -234,16 +222,20 @@ class StyleContext final {
styler.GetRangeLowered(styler.GetStartSegment(), currentPos, s, len);
}

void Rewind() noexcept {
currentPos = styler.GetStartSegment();
void SeekTo(Sci_PositionU startPos) noexcept {
currentPos = startPos;
chPrev = 0;
if (!multiByteAccess) {
ch = static_cast<unsigned char>(styler[currentPos]);
ch = static_cast<unsigned char>(styler[startPos]);
} else {
ch = styler.GetCharacterAndWidth(currentPos, &widthNext);
ch = styler.GetCharacterAndWidth(startPos, &widthNext);
width = widthNext;
}
Forward();
GetNextChar();
}

void Rewind() noexcept {
SeekTo(styler.GetStartSegment());
}

bool LineEndsWith(char ch0) const noexcept {
Expand Down
52 changes: 24 additions & 28 deletions scintilla/src/Document.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -896,7 +896,7 @@ Document::CharacterExtracted Document::CharacterAfter(Sci::Position position) co
return CharacterExtracted(unicodeReplacementChar, 0);
}
const unsigned char leadByte = cb.UCharAt(position);
if (!dbcsCodePage || UTF8IsAscii(leadByte)) {
if (UTF8IsAscii(leadByte) || !dbcsCodePage) {
// Common case: ASCII character
return CharacterExtracted(leadByte, 1);
}
Expand All @@ -915,10 +915,12 @@ Document::CharacterExtracted Document::CharacterAfter(Sci::Position position) co
}
} else {
if (IsDBCSLeadByteNoExcept(leadByte) && ((position + 1) < Length())) {
return CharacterExtracted::DBCS(leadByte, cb.UCharAt(position + 1));
} else {
return CharacterExtracted(leadByte, 1);
const unsigned char trailByte = cb.UCharAt(position + 1);
if (!IsDBCSTrailByteInvalid(trailByte)) {
return CharacterExtracted::DBCS(leadByte, trailByte);
}
}
return CharacterExtracted(leadByte, 1);
}
}

Expand Down Expand Up @@ -1006,39 +1008,33 @@ Sci::Position Document::GetRelativePositionUTF16(Sci::Position positionStart, Sc
}

int SCI_METHOD Document::GetCharacterAndWidth(Sci_Position position, Sci_Position *pWidth) const noexcept {
int character;
int bytesInCharacter = 1;
const unsigned char leadByte = cb.UCharAt(position);
if (dbcsCodePage) {
int character = leadByte;
if (!UTF8IsAscii(leadByte) && dbcsCodePage) {
if (CpUtf8 == dbcsCodePage) {
if (UTF8IsAscii(leadByte)) {
// Single byte character or invalid
character = leadByte;
const int widthCharBytes = UTF8BytesOfLead(leadByte);
unsigned char charBytes[UTF8MaxBytes] = { leadByte, 0, 0, 0 };
for (int b = 1; b < widthCharBytes; b++) {
charBytes[b] = cb.UCharAt(position + b);
}
const int utf8status = UTF8ClassifyMulti(charBytes, widthCharBytes);
if (utf8status & UTF8MaskInvalid) {
// Report as singleton surrogate values which are invalid Unicode
character = 0xDC80 + character;
} else {
const int widthCharBytes = UTF8BytesOfLead(leadByte);
unsigned char charBytes[UTF8MaxBytes] = { leadByte, 0, 0, 0 };
for (int b = 1; b < widthCharBytes; b++) {
charBytes[b] = cb.UCharAt(position + b);
}
const int utf8status = UTF8ClassifyMulti(charBytes, widthCharBytes);
if (utf8status & UTF8MaskInvalid) {
// Report as singleton surrogate values which are invalid Unicode
character = 0xDC80 + leadByte;
} else {
bytesInCharacter = utf8status & UTF8MaskWidth;
character = UnicodeFromUTF8(charBytes);
}
bytesInCharacter = utf8status & UTF8MaskWidth;
character = UnicodeFromUTF8(charBytes);
}
} else {
if (IsDBCSLeadByteNoExcept(leadByte)) {
bytesInCharacter = 2;
character = (leadByte << 8) | cb.UCharAt(position + 1);
} else {
character = leadByte;
const unsigned char trailByte = cb.UCharAt(position + 1);
if (!IsDBCSTrailByteInvalid(trailByte)) {
bytesInCharacter = 2;
character = (character << 8) | trailByte;
}
}
}
} else {
character = leadByte;
}
if (pWidth) {
*pWidth = bytesInCharacter;
Expand Down

0 comments on commit 3e7741f

Please sign in to comment.