From da073686dcfc0fbc06f4e89167d9f003284a0d05 Mon Sep 17 00:00:00 2001
From: Rainer Kottenhoff
Date: Sat, 5 Aug 2017 20:20:22 +0200
Subject: [PATCH] reverting, and fixing encoding / code-page issues: -
consistent encoding <> code-page handling (including Scintilla's code-page
settings) - Scintilla issue regarding notepad2-mod issie #173 (see
https://github.com/XhmikosR/notepad2-mod/pull/193) - allow arbitrary
conversion between encodings (even it it does not make sense in any case)
(instead of doing silently nothing but changing encoding info on status bar)
---
scintilla/doc/ScintillaDoc.html | 3 +-
scintilla/doc/ScintillaHistory.html | 4 +-
scintilla/win32/ScintillaWin.cxx | 3 +-
src/Edit.c | 203 +++++++++++++++-------------
src/Edit.h | 10 +-
src/Helpers.c | 15 --
src/Helpers.h | 2 +-
src/Notepad3.c | 65 ++++-----
src/Notepad3.rc | 6 +-
src/Styles.c | 5 +-
10 files changed, 160 insertions(+), 156 deletions(-)
diff --git a/scintilla/doc/ScintillaDoc.html b/scintilla/doc/ScintillaDoc.html
index a7e7eba36..a588e513f 100644
--- a/scintilla/doc/ScintillaDoc.html
+++ b/scintilla/doc/ScintillaDoc.html
@@ -2897,7 +2897,8 @@ Style definition
number used by the C and C++ lexer to display literal strings; it has the value 6). This
feature works differently on Windows and GTK+.
The default character set is SC_CHARSET_DEFAULT
.
- SC_CHARSET_ANSI
and SC_CHARSET_DEFAULT
specify European Windows code page 1252 unless the code page is set.
+ SC_CHARSET_ANSI
specifies European Windows code page 1252.
+ SC_CHARSET_DEFAULT
specifies the system-default code page unless the code page is set.
diff --git a/scintilla/doc/ScintillaHistory.html b/scintilla/doc/ScintillaHistory.html
index 683a2c4a5..e6ea0f66e 100644
--- a/scintilla/doc/ScintillaHistory.html
+++ b/scintilla/doc/ScintillaHistory.html
@@ -1,4 +1,4 @@
-
+
@@ -1028,7 +1028,7 @@
C++11 range-based for loops used in SciTE so GCC 4.6 is now the minimum supported version.
- SC_CHARSET_DEFAULT now means code page 1252 on Windows unless a code page is set.
+ (REVERTED in Notepad2-mod) SC_CHARSET_DEFAULT now means code page 1252 on Windows unless a code page is set.
This prevents unexpected behaviour and crashes on East Asian systems where default locales are commonly DBCS.
Projects which want to default to DBCS code pages in East Asian locales should set the code page and
character set explicitly.
diff --git a/scintilla/win32/ScintillaWin.cxx b/scintilla/win32/ScintillaWin.cxx
index 81f288c72..0b47c36ed 100644
--- a/scintilla/win32/ScintillaWin.cxx
+++ b/scintilla/win32/ScintillaWin.cxx
@@ -1144,7 +1144,8 @@ UINT CodePageFromCharSet(DWORD characterSet, UINT documentCodePage) {
}
switch (characterSet) {
case SC_CHARSET_ANSI: return 1252;
- case SC_CHARSET_DEFAULT: return documentCodePage ? documentCodePage : 1252;
+ //case SC_CHARSET_DEFAULT: return documentCodePage ? documentCodePage : 1252; // SCI orig
+ case SC_CHARSET_DEFAULT: return documentCodePage;
case SC_CHARSET_BALTIC: return 1257;
case SC_CHARSET_CHINESEBIG5: return 950;
case SC_CHARSET_EASTEUROPE: return 1250;
diff --git a/src/Edit.c b/src/Edit.c
index 9088d25a6..0185a1bcd 100644
--- a/src/Edit.c
+++ b/src/Edit.c
@@ -57,9 +57,9 @@ extern BOOL bFixLineEndings;
extern BOOL bAutoStripBlanks;
-// Internal Codepage and Character Set
-extern int iInternalCodePage;
-extern int iInternalCharSet;
+// Default Codepage and Character Set
+extern int iDefaultCodePage;
+extern int iDefaultCharSet;
extern BOOL bSkipUnicodeDetection;
extern BOOL bLoadASCIIasUTF8;
extern BOOL bLoadNFOasOEM;
@@ -73,15 +73,15 @@ WCHAR wchANSI[8] = L"";
WCHAR wchOEM [8] = L"";
NP2ENCODING mEncoding[] = {
- { NCP_DEFAULT|NCP_RECODE, 0, "ansi,ansi,ascii,", 61000, L"" },
- { NCP_8BIT|NCP_RECODE, 0, "oem,oem,", 61001, L"" },
- { NCP_UNICODE|NCP_UNICODE_BOM, 0, "", 61002, L"" },
- { NCP_UNICODE|NCP_UNICODE_REVERSE|NCP_UNICODE_BOM, 0, "", 61003, L"" },
- { NCP_UNICODE|NCP_RECODE, 0, "utf-16,utf16,unicode,", 61004, L"" },
- { NCP_UNICODE|NCP_UNICODE_REVERSE|NCP_RECODE, 0, "utf-16be,utf16be,unicodebe,", 61005, L"" },
- { NCP_UTF8|NCP_RECODE, CP_UTF8, "utf-8,utf8,", 61006, L"" },
- { NCP_UTF8|NCP_UTF8_SIGN, CP_UTF8, "utf-8,utf8,", 61007, L"" },
- { NCP_8BIT|NCP_RECODE, CP_UTF7, "utf-7,utf7,", 61008, L"" },
+ { NCP_ANSI|NCP_RECODE, CP_ACP, "ansi,ansi,ascii,", 61000, L"" },
+ { NCP_8BIT|NCP_RECODE, CP_OEMCP, "oem,oem,", 61001, L"" },
+ { NCP_UNICODE|NCP_UNICODE_BOM, CP_UTF8, "", 61002, L"" },
+ { NCP_UNICODE|NCP_UNICODE_REVERSE|NCP_UNICODE_BOM, CP_UTF8, "", 61003, L"" },
+ { NCP_UNICODE|NCP_RECODE, CP_UTF8, "utf-16,utf16,unicode,", 61004, L"" },
+ { NCP_UNICODE|NCP_UNICODE_REVERSE|NCP_RECODE, CP_UTF8, "utf-16be,utf16be,unicodebe,", 61005, L"" },
+ { NCP_UTF8|NCP_RECODE, CP_UTF8, "utf-8,utf8,", 61006, L"" },
+ { NCP_UTF8|NCP_UTF8_SIGN, CP_UTF8, "utf-8,utf8,", 61007, L"" },
+ { NCP_8BIT|NCP_RECODE, CP_UTF7, "utf-7,utf7,", 61008, L"" },
{ NCP_8BIT|NCP_RECODE, 720, "DOS-720,dos720,", 61009, L"" },
{ NCP_8BIT|NCP_RECODE, 28596, "iso-8859-6,iso88596,arabic,csisolatinarabic,ecma114,isoir127,", 61010, L"" },
{ NCP_8BIT|NCP_RECODE, 10004, "x-mac-arabic,xmacarabic,", 61011, L"" },
@@ -238,7 +238,7 @@ HWND EditCreate(HWND hwndParent)
g_hInstance,
NULL);
- SetInternalCodePage(hwnd,iInternalCodePage);
+ SendMessage(hwnd,SCI_SETCODEPAGE,Encoding_GetSciCodePage(iDefaultEncoding),0);
SendMessage(hwnd,SCI_SETEOLMODE,SC_EOL_CRLF,0);
SendMessage(hwnd,SCI_SETPASTECONVERTENDINGS,1,0);
SendMessage(hwnd,SCI_SETMODEVENTMASK,/*SC_MODEVENTMASKALL*/SC_MOD_INSERTTEXT|SC_MOD_DELETETEXT,0);
@@ -325,6 +325,9 @@ BOOL EditConvertText(HWND hwnd,int encSource,int encDest,BOOL bSetSavePoint)
if (encSource == encDest)
return(TRUE);
+ if (!(Encoding_IsValid(encSource) && Encoding_IsValid(encDest)))
+ return(FALSE);
+
length = (int)SendMessage(hwnd,SCI_GETLENGTH,0,0);
if (length == 0) {
@@ -333,7 +336,7 @@ BOOL EditConvertText(HWND hwnd,int encSource,int encDest,BOOL bSetSavePoint)
SendMessage(hwnd,SCI_EMPTYUNDOBUFFER,0,0);
SendMessage(hwnd,SCI_CLEARALL,0,0);
SendMessage(hwnd,SCI_MARKERDELETEALL,(WPARAM)-1,0);
- SetInternalCodePage(hwnd, encDest);
+ SendMessage(hwnd,SCI_SETCODEPAGE, Encoding_GetSciCodePage(encDest),0);
SendMessage(hwnd,SCI_SETUNDOCOLLECTION,1,0);
SendMessage(hwnd,EM_EMPTYUNDOBUFFER,0,0);
SendMessage(hwnd,SCI_GOTOPOS,0,0);
@@ -344,6 +347,7 @@ BOOL EditConvertText(HWND hwnd,int encSource,int encDest,BOOL bSetSavePoint)
}
else {
+
pchText = GlobalAlloc(GPTR,length*5+2);
tr.lpstrText = pchText;
@@ -351,17 +355,17 @@ BOOL EditConvertText(HWND hwnd,int encSource,int encDest,BOOL bSetSavePoint)
pwchText = GlobalAlloc(GPTR,length*3+2);
- UINT cpSrc = (encSource == CPI_ANSI) ? CP_ACP : ((encSource == CPI_OEM) ? CP_OEMCP : ((encSource == CPI_UTF7) ? CP_UTF7 : CP_UTF8));
- UINT cpDst = (encDest == CPI_ANSI) ? CP_ACP : ((encDest == CPI_OEM) ? CP_OEMCP : ((encDest == CPI_UTF7) ? CP_UTF7 : CP_UTF8));
- cbwText = MultiByteToWideChar(cpSrc,0,pchText,length,pwchText,length*3+2);
- cbText = WideCharToMultiByte(cpDst,0,pwchText,cbwText,pchText,length*5+2,NULL,NULL);
+ UINT cpSrc = mEncoding[encSource].uCodePage;
+ UINT cpDst = mEncoding[encDest].uCodePage;
+ cbwText = MultiByteToWideChar(cpSrc,0,pchText,length,pwchText,length*3+2);
+ cbText = WideCharToMultiByte(cpDst,0,pwchText,cbwText,pchText,length*5+2,NULL,NULL);
SendMessage(hwnd,SCI_CANCEL,0,0);
SendMessage(hwnd,SCI_SETUNDOCOLLECTION,0,0);
SendMessage(hwnd,SCI_EMPTYUNDOBUFFER,0,0);
SendMessage(hwnd,SCI_CLEARALL,0,0);
SendMessage(hwnd,SCI_MARKERDELETEALL,(WPARAM)-1,0);
- SetInternalCodePage(hwnd, encDest);
+ SendMessage(hwnd,SCI_SETCODEPAGE,Encoding_GetSciCodePage(encDest),0);
SendMessage(hwnd,SCI_ADDTEXT,cbText,(LPARAM)pchText);
SendMessage(hwnd,SCI_EMPTYUNDOBUFFER,0,0);
SendMessage(hwnd,SCI_SETUNDOCOLLECTION,1,0);
@@ -383,43 +387,39 @@ BOOL EditSetNewEncoding(HWND hwnd,int iCurrentEncoding,int iNewEncoding,BOOL bNo
if (iCurrentEncoding != iNewEncoding) {
- if (iCurrentEncoding != CPI_ANSI && iNewEncoding != CPI_ANSI)
- return(TRUE); // can only convert from-and-to ANSI
-
- if (SendMessage(hwnd,SCI_GETLENGTH,0,0) == 0) {
-
- BOOL bIsEmptyUndoHistory = (SendMessage(hwnd,SCI_CANUNDO,0,0) == 0 && SendMessage(hwnd,SCI_CANREDO,0,0) == 0);
+ BOOL bOneEncodingIsANSI = (Encoding_IsANSI(iCurrentEncoding) || Encoding_IsANSI(iNewEncoding));
+ BOOL bBothEncodingsAreANSI = (Encoding_IsANSI(iCurrentEncoding) && Encoding_IsANSI(iNewEncoding));
+
+ // conversion between arbirtaty encodings may lead to unexpected results
+ if (!bOneEncodingIsANSI || bBothEncodingsAreANSI) {
+ // ~ return(TRUE); // this would imply a successful conversion - it is not !
+ // return(FALSE); // commented out ? : allow conversion between arbirtaty encodings
+ }
+
+ if (SendMessage(hwnd, SCI_GETLENGTH, 0, 0) == 0) {
- if ((iCurrentEncoding == CPI_ANSI || iNewEncoding == CPI_ANSI) &&
- (bNoUI || bIsEmptyUndoHistory || InfoBox(MBYESNO,L"MsgConv2",IDS_ASK_ENCODING2) == IDYES)) {
+ BOOL bIsEmptyUndoHistory = (SendMessage(hwnd, SCI_CANUNDO, 0, 0) == 0 && SendMessage(hwnd, SCI_CANREDO, 0, 0) == 0);
- EditConvertText(hwnd, iCurrentEncoding, iNewEncoding, bSetSavePoint);
+ BOOL doNewEncoding = (!bIsEmptyUndoHistory && !bNoUI) ?
+ (InfoBox(MBYESNO, L"MsgConv2", IDS_ASK_ENCODING2) == IDYES) : TRUE;
- return(TRUE);
+ if (doNewEncoding) {
+ return EditConvertText(hwnd,iCurrentEncoding,iNewEncoding,bSetSavePoint);
}
-
- else
- return(FALSE);
}
-
- else if ((iCurrentEncoding == CPI_ANSI || iNewEncoding == CPI_ANSI) &&
- (bNoUI || InfoBox(MBYESNO,L"MsgConv1",IDS_ASK_ENCODING) == IDYES)) {
-
- BeginWaitCursor();
-
- EditConvertText(hwnd, iCurrentEncoding, iNewEncoding, FALSE);
-
- EndWaitCursor();
-
- return(TRUE);
+ else {
+
+ BOOL doNewEncoding = (!bNoUI) ? (InfoBox(MBYESNO, L"MsgConv1", IDS_ASK_ENCODING) == IDYES) : TRUE;
+
+ if (doNewEncoding) {
+ BeginWaitCursor();
+ BOOL result = EditConvertText(hwnd,iCurrentEncoding,iNewEncoding,FALSE);
+ EndWaitCursor();
+ return(result);
+ }
}
-
- else
- return(FALSE);
- }
-
- else
- return(FALSE);
+ }
+ return(FALSE);
}
@@ -637,7 +637,7 @@ int Encoding_MapIniSetting(BOOL bLoad,int iSetting) {
if (bLoad) {
switch (iSetting) {
case -1: return CPI_NONE;
- case 0: return CPI_ANSI;
+ case 0: return CPI_ANSI_DEFAULT;
case 1: return CPI_UNICODEBOM;
case 2: return CPI_UNICODEBEBOM;
case 3: return CPI_UTF8;
@@ -651,14 +651,14 @@ int Encoding_MapIniSetting(BOOL bLoad,int iSetting) {
if ((mEncoding[i].uCodePage == (UINT)iSetting) && Encoding_IsValid(i))
return(i);
}
- return CPI_UTF8; // not found
+ return CPI_ANSI_DEFAULT;
}
}
}
else {
switch (iSetting) {
case CPI_NONE: return -1;
- case CPI_ANSI: return 0;
+ case CPI_ANSI_DEFAULT: return 0;
case CPI_UNICODEBOM: return 1;
case CPI_UNICODEBEBOM: return 2;
case CPI_UTF8: return 3;
@@ -667,7 +667,12 @@ int Encoding_MapIniSetting(BOOL bLoad,int iSetting) {
case CPI_UNICODE: return 6;
case CPI_UNICODEBE: return 7;
case CPI_UTF7: return 8;
- default: return(mEncoding[iSetting].uCodePage);
+ default: {
+ if (Encoding_IsValid(iSetting))
+ return(mEncoding[iSetting].uCodePage);
+ else
+ return CPI_ANSI_DEFAULT;
+ }
}
}
}
@@ -783,7 +788,7 @@ void Encoding_AddToListView(HWND hwnd,int idSel,BOOL bRecodeOnly)
else
StrCpyN(wchBuf,pEE[i].wch,COUNTOF(wchBuf));
- if (id == CPI_ANSI)
+ if (Encoding_IsANSI(id))
StrCatN(wchBuf,wchANSI,COUNTOF(wchBuf));
else if (id == CPI_OEM)
StrCatN(wchBuf,wchOEM,COUNTOF(wchBuf));
@@ -875,7 +880,7 @@ void Encoding_AddToComboboxEx(HWND hwnd,int idSel,BOOL bRecodeOnly)
else
StrCpyN(wchBuf,pEE[i].wch,COUNTOF(wchBuf));
- if (id == CPI_ANSI)
+ if (Encoding_IsANSI(id))
StrCatN(wchBuf,wchANSI,COUNTOF(wchBuf));
else if (id == CPI_OEM)
StrCatN(wchBuf,wchOEM,COUNTOF(wchBuf));
@@ -921,6 +926,25 @@ BOOL Encoding_GetFromComboboxEx(HWND hwnd,int *pidEncoding)
}
+BOOL Encoding_IsDefault(int iEncoding)
+{
+ return (mEncoding[iEncoding].uFlags & NCP_DEFAULT);
+}
+
+BOOL Encoding_IsANSI(int iEncoding)
+{
+ return (mEncoding[iEncoding].uFlags & NCP_ANSI);
+}
+
+int Encoding_GetSciCodePage(int iEncoding)
+{
+ if (Encoding_IsDefault(iEncoding))
+ return iDefaultCodePage;
+
+ return (Encoding_IsANSI(iEncoding)) ? 0 : SC_CP_UTF8;
+}
+
+
BOOL IsUnicode(const char* pBuffer,int cb,LPBOOL lpbBOM,LPBOOL lpbReverse)
{
int i = 0xFFFF;
@@ -1185,13 +1209,14 @@ BOOL EditLoadFile(
char* lpData;
DWORD cbData;
//char *cp;
- int _iDefaultEncoding;
+ int _iPrefEncoding;
BOOL bBOM;
BOOL bReverse = FALSE;
BOOL bPreferOEM = FALSE;
+ *iEncoding = CPI_ANSI_DEFAULT;
*pbUnicodeErr = FALSE;
*pbFileTooBig = FALSE;
@@ -1205,8 +1230,8 @@ BOOL EditLoadFile(
dwLastIOError = GetLastError();
if (hFile == INVALID_HANDLE_VALUE) {
- iSrcEncoding = -1;
- iWeakSrcEncoding = -1;
+ iSrcEncoding = CPI_NONE;
+ iWeakSrcEncoding = CPI_NONE;
return FALSE;
}
@@ -1220,8 +1245,8 @@ BOOL EditLoadFile(
if (InfoBox(MBYESNO,L"MsgFileSizeWarning",IDS_WARNLOADBIGFILE) != IDYES) {
CloseHandle(hFile);
*pbFileTooBig = TRUE;
- iSrcEncoding = -1;
- iWeakSrcEncoding = -1;
+ iSrcEncoding = CPI_NONE;
+ iWeakSrcEncoding = CPI_NONE;
return FALSE;
}
}
@@ -1233,8 +1258,8 @@ BOOL EditLoadFile(
if (!bReadSuccess) {
GlobalFree(lpData);
- iSrcEncoding = -1;
- iWeakSrcEncoding = -1;
+ iSrcEncoding = CPI_NONE;
+ iWeakSrcEncoding = CPI_NONE;
return FALSE;
}
@@ -1245,34 +1270,29 @@ BOOL EditLoadFile(
bPreferOEM = TRUE;
}
- if (!Encoding_IsValid(iDefaultEncoding))
- iDefaultEncoding = bLoadASCIIasUTF8 ? CPI_UTF8 : CPI_ANSI;
-
- _iDefaultEncoding = (bPreferOEM) ? g_DOSEncoding : iDefaultEncoding;
- if (iWeakSrcEncoding != -1 && Encoding_IsValid(iWeakSrcEncoding))
- _iDefaultEncoding = iWeakSrcEncoding;
-
- *iEncoding = bLoadASCIIasUTF8 ? CPI_UTF8 : CPI_ANSI;
+ _iPrefEncoding = (bPreferOEM) ? g_DOSEncoding : iDefaultEncoding;
+ if (Encoding_IsValid(iWeakSrcEncoding))
+ _iPrefEncoding = iWeakSrcEncoding;
if (cbData == 0) {
FileVars_Init(NULL,0,&fvCurFile);
*iEOLMode = iLineEndings[iDefaultEOLMode];
- if (iSrcEncoding == -1) {
+ if (iSrcEncoding == CPI_NONE) {
if (bLoadASCIIasUTF8 && !bPreferOEM)
*iEncoding = CPI_UTF8;
else
- *iEncoding = _iDefaultEncoding;
+ *iEncoding = _iPrefEncoding;
}
else
*iEncoding = iSrcEncoding;
- SetInternalCodePage(hwnd, *iEncoding);
+ SendMessage(hwnd,SCI_SETCODEPAGE,Encoding_GetSciCodePage(*iEncoding),0);
EditSetNewText(hwnd,"",0);
SendMessage(hwnd,SCI_SETEOLMODE,iLineEndings[iDefaultEOLMode],0);
GlobalFree(lpData);
}
- else if (!bSkipEncodingDetection && (iSrcEncoding == -1 || iSrcEncoding == CPI_UNICODE || iSrcEncoding == CPI_UNICODEBE) &&
+ else if (!bSkipEncodingDetection && (iSrcEncoding == CPI_NONE || iSrcEncoding == CPI_UNICODE || iSrcEncoding == CPI_UNICODEBE) &&
(iSrcEncoding == CPI_UNICODE || iSrcEncoding == CPI_UNICODEBE || IsUnicode(lpData,cbData,&bBOM,&bReverse)) &&
(iSrcEncoding == CPI_UNICODE || iSrcEncoding == CPI_UNICODEBE || !IsUTF8Signature(lpData))) // check for UTF-8 signature
{
@@ -1310,7 +1330,7 @@ BOOL EditLoadFile(
}
GlobalFree(lpData);
- SetInternalCodePage(hwnd, SC_CP_UTF8);
+ SendMessage(hwnd,SCI_SETCODEPAGE,Encoding_GetSciCodePage(*iEncoding),0);
EditSetNewText(hwnd,"",0);
FileVars_Init(lpDataUTF8,cbData-1,&fvCurFile);
EditSetNewText(hwnd,lpDataUTF8,cbData-1);
@@ -1320,7 +1340,7 @@ BOOL EditLoadFile(
else {
FileVars_Init(lpData,cbData,&fvCurFile);
- if (!bSkipEncodingDetection && (iSrcEncoding == -1 || iSrcEncoding == CPI_UTF8 || iSrcEncoding == CPI_UTF8SIGN) &&
+ if (!bSkipEncodingDetection && (iSrcEncoding == CPI_NONE || iSrcEncoding == CPI_UTF8 || iSrcEncoding == CPI_UTF8SIGN) &&
((IsUTF8Signature(lpData) ||
FileVars_IsUTF8(&fvCurFile) ||
(iSrcEncoding == CPI_UTF8 || iSrcEncoding == CPI_UTF8SIGN) ||
@@ -1328,11 +1348,11 @@ BOOL EditLoadFile(
(((UTF8_mbslen_bytes(UTF8StringStart(lpData)) - 1 !=
UTF8_mbslen(UTF8StringStart(lpData),IsUTF8Signature(lpData) ? cbData-3 : cbData)) ||
(!bPreferOEM && (
- mEncoding[_iDefaultEncoding].uFlags & NCP_UTF8 ||
+ mEncoding[_iPrefEncoding].uFlags & NCP_UTF8 ||
bLoadASCIIasUTF8 )) ))))) && !(FileVars_IsNonUTF8(&fvCurFile) &&
(iSrcEncoding != CPI_UTF8 && iSrcEncoding != CPI_UTF8SIGN)))
{
- SetInternalCodePage(hwnd, SC_CP_UTF8);
+ SendMessage(hwnd,SCI_SETCODEPAGE,Encoding_GetSciCodePage(CPI_UTF8),0);
EditSetNewText(hwnd,"",0);
if (IsUTF8Signature(lpData)) {
EditSetNewText(hwnd,UTF8StringStart(lpData),cbData-3);
@@ -1353,20 +1373,20 @@ BOOL EditLoadFile(
LPWSTR lpDataWide;
int cbDataWide;
- if (iSrcEncoding != -1)
+ if (iSrcEncoding != CPI_NONE)
*iEncoding = iSrcEncoding;
else {
*iEncoding = FileVars_GetEncoding(&fvCurFile);
- if (*iEncoding == -1) {
+ if (*iEncoding == CPI_NONE) {
if (fvCurFile.mask & FV_ENCODING)
- *iEncoding = bLoadASCIIasUTF8 ? CPI_UTF8 : CPI_ANSI;
+ *iEncoding = CPI_ANSI_DEFAULT;
else {
- if (iWeakSrcEncoding == -1)
- *iEncoding = _iDefaultEncoding;
+ if (iWeakSrcEncoding == CPI_NONE)
+ *iEncoding = _iPrefEncoding;
else if (mEncoding[iWeakSrcEncoding].uFlags & NCP_INTERNAL)
*iEncoding = iDefaultEncoding;
else
- *iEncoding = _iDefaultEncoding;
+ *iEncoding = _iPrefEncoding;
}
}
}
@@ -1385,18 +1405,16 @@ BOOL EditLoadFile(
cbData = WideCharToMultiByte(CP_UTF8,0,lpDataWide,cbDataWide,lpData,(int)GlobalSize(lpData),NULL,NULL);
GlobalFree(lpDataWide);
- SetInternalCodePage(hwnd, SC_CP_UTF8);
+ SendMessage(hwnd,SCI_SETCODEPAGE,Encoding_GetSciCodePage(*iEncoding),0);
EditSetNewText(hwnd,"",0);
EditSetNewText(hwnd,lpData,cbData);
*iEOLMode = EditDetectEOLMode(hwnd,lpData,cbData);
GlobalFree(lpData);
}
-
else {
-
- *iEncoding = bLoadASCIIasUTF8 ? CPI_UTF8 : CPI_ANSI;
- SetInternalCodePage(hwnd, *iEncoding);
+ *iEncoding = iDefaultEncoding;
+ SendMessage(hwnd,SCI_SETCODEPAGE,Encoding_GetSciCodePage(*iEncoding),0);
EditSetNewText(hwnd,"",0);
EditSetNewText(hwnd,lpData,cbData);
*iEOLMode = EditDetectEOLMode(hwnd,lpData,cbData);
@@ -1473,11 +1491,6 @@ BOOL EditSaveFile(
if (bAutoStripBlanks)
EditStripTrailingBlanks(hwnd,TRUE);
- // convert to ANSI, if CP is set to UTF8
- if ((iEncoding == CPI_ANSI) && (iInternalCodePage == SC_CP_UTF8)) {
- EditSetNewEncoding(hwnd, CPI_UTF8, CPI_ANSI, FALSE, TRUE);
- }
-
// get text
cbData = (int)SendMessage(hwnd,SCI_GETLENGTH,0,0);
lpData = GlobalAlloc(GPTR, cbData + 4); //fix: +bom
diff --git a/src/Edit.h b/src/Edit.h
index 0ace8af70..4625a7297 100644
--- a/src/Edit.h
+++ b/src/Edit.h
@@ -137,10 +137,11 @@ extern int g_DOSEncoding;
#define NCP_UNICODE_REVERSE 16
#define NCP_UNICODE_BOM 32
#define NCP_8BIT 64
-#define NCP_INTERNAL (NCP_DEFAULT|NCP_UTF8|NCP_UTF8_SIGN|NCP_UNICODE|NCP_UNICODE_REVERSE|NCP_UNICODE_BOM)
-#define NCP_RECODE 128
+#define NCP_ANSI 128
+#define NCP_INTERNAL (NCP_DEFAULT|NCP_UTF8|NCP_UTF8_SIGN|NCP_UNICODE|NCP_UNICODE_REVERSE|NCP_UNICODE_BOM|NCP_ANSI)
+#define NCP_RECODE 256
#define CPI_NONE -1
-#define CPI_ANSI 0
+#define CPI_ANSI_DEFAULT 0
#define CPI_OEM 1
#define CPI_UNICODEBOM 2
#define CPI_UNICODEBEBOM 3
@@ -171,6 +172,9 @@ void Encoding_AddToListView(HWND,int,BOOL);
BOOL Encoding_GetFromListView(HWND,int *);
void Encoding_AddToComboboxEx(HWND,int,BOOL);
BOOL Encoding_GetFromComboboxEx(HWND,int *);
+BOOL Encoding_IsDefault(int);
+BOOL Encoding_IsANSI(int);
+int Encoding_GetSciCodePage(int);
BOOL IsUnicode(const char*,int,LPBOOL,LPBOOL);
BOOL IsUTF8(const char*,int);
diff --git a/src/Helpers.c b/src/Helpers.c
index 2754a459a..2c2deb935 100644
--- a/src/Helpers.c
+++ b/src/Helpers.c
@@ -27,11 +27,6 @@
#include
#include "helpers.h"
#include "resource.h"
-#include "Edit.h"
-
-
-//=============================================================================
-extern iInternalCodePage;
//=============================================================================
@@ -496,16 +491,6 @@ void SetWindowTransparentMode(HWND hwnd,BOOL bTransparentMode)
}
-//=============================================================================
-//
-// SetInternalCodePage()
-//
-void SetInternalCodePage(HWND hwnd, int encoding) {
- iInternalCodePage = (encoding == CPI_ANSI) ? 0 : SC_CP_UTF8;
- SendMessage(hwnd, SCI_SETCODEPAGE, iInternalCodePage, 0);
-}
-
-
//=============================================================================
//
// CenterDlgInParent()
diff --git a/src/Helpers.h b/src/Helpers.h
index 8c1356b79..d09b22a25 100644
--- a/src/Helpers.h
+++ b/src/Helpers.h
@@ -83,7 +83,7 @@ BOOL IsFontAvailable(LPCWSTR);
BOOL SetWindowTitle(HWND,UINT,BOOL,UINT,LPCWSTR,int,BOOL,UINT,BOOL,LPCWSTR);
void SetWindowTransparentMode(HWND,BOOL);
-void SetInternalCodePage(HWND,int);
+
void CenterDlgInParent(HWND);
void GetDlgPos(HWND,LPINT,LPINT);
diff --git a/src/Notepad3.c b/src/Notepad3.c
index 2a96df52a..1f2bec603 100644
--- a/src/Notepad3.c
+++ b/src/Notepad3.c
@@ -222,8 +222,8 @@ int iEncoding;
int iOriginalEncoding;
int iEOLMode;
-int iInternalCodePage;
-int iInternalCharSet;
+int iDefaultCodePage;
+int iDefaultCharSet;
int iInitialLine;
int iInitialColumn;
@@ -919,7 +919,7 @@ HWND InitInstance(HINSTANCE hInstance,LPSTR pszCmdLine,int nCmdShow)
if (iSrcEncoding != -1) {
iEncoding = iSrcEncoding;
iOriginalEncoding = iSrcEncoding;
- SetInternalCodePage(hwndEdit, iEncoding);
+ SendMessage(hwndEdit,SCI_SETCODEPAGE,Encoding_GetSciCodePage(iEncoding),0);
}
}
@@ -2097,7 +2097,7 @@ void MsgInitMenu(HWND hwnd,WPARAM wParam,LPARAM lParam)
i = IDM_ENCODING_UTF8SIGN;
else if (mEncoding[iEncoding].uFlags & NCP_UTF8)
i = IDM_ENCODING_UTF8;
- else if (mEncoding[iEncoding].uFlags & NCP_DEFAULT)
+ else if (mEncoding[iEncoding].uFlags & NCP_ANSI)
i = IDM_ENCODING_ANSI;
else
i = -1;
@@ -2750,7 +2750,7 @@ LRESULT MsgCommand(HWND hwnd,WPARAM wParam,LPARAM lParam)
case IDM_ENCODING_UNICODEREV: iNewEncoding = CPI_UNICODEBEBOM; break;
case IDM_ENCODING_UTF8: iNewEncoding = CPI_UTF8; break;
case IDM_ENCODING_UTF8SIGN: iNewEncoding = CPI_UTF8SIGN; break;
- case IDM_ENCODING_ANSI: iNewEncoding = CPI_ANSI; break;
+ case IDM_ENCODING_ANSI: iNewEncoding = CPI_ANSI_DEFAULT; break;
}
}
@@ -2763,7 +2763,7 @@ LRESULT MsgCommand(HWND hwnd,WPARAM wParam,LPARAM lParam)
iOriginalEncoding = iNewEncoding;
}
else {
- if (iEncoding == CPI_ANSI || iNewEncoding == CPI_ANSI)
+ if (Encoding_IsANSI(iEncoding) || Encoding_IsANSI(iNewEncoding))
iOriginalEncoding = CPI_NONE;
iEncoding = iNewEncoding;
}
@@ -2785,21 +2785,21 @@ LRESULT MsgCommand(HWND hwnd,WPARAM wParam,LPARAM lParam)
WCHAR tchCurFile2[MAX_PATH];
- int iNewEncoding = CPI_NONE;
- if (iEncoding != CPI_ANSI)
- iNewEncoding = iEncoding;
+ // file to ANSI is default loading behaviour, recoding does not make sense
+ int iNewEncoding = Encoding_IsANSI(iEncoding) ? CPI_NONE : iEncoding;
+
if (iEncoding == CPI_UTF8SIGN)
iNewEncoding = CPI_UTF8;
- if (iEncoding == CPI_UNICODEBOM)
+ else if (iEncoding == CPI_UNICODEBOM)
iNewEncoding = CPI_UNICODE;
- if (iEncoding == CPI_UNICODEBEBOM)
+ else if (iEncoding == CPI_UNICODEBEBOM)
iNewEncoding = CPI_UNICODEBE;
if ((bModified || iEncoding != iOriginalEncoding) && MsgBox(MBOKCANCEL,IDS_ASK_RECODE) != IDOK)
return(0);
- if (RecodeDlg(hwnd,&iNewEncoding)) {
-
+ if (RecodeDlg(hwnd,&iNewEncoding))
+ {
lstrcpy(tchCurFile2,szCurFile);
iSrcEncoding = iNewEncoding;
FileLoad(TRUE,FALSE,TRUE,FALSE,tchCurFile2);
@@ -4517,7 +4517,7 @@ LRESULT MsgCommand(HWND hwnd,WPARAM wParam,LPARAM lParam)
{
WCHAR tchCurFile2[MAX_PATH];
if (lstrlen(szCurFile)) {
- iSrcEncoding = CPI_ANSI;
+ iSrcEncoding = CPI_ANSI_DEFAULT;
lstrcpy(tchCurFile2,szCurFile);
FileLoad(FALSE,FALSE,TRUE,FALSE,tchCurFile2);
}
@@ -5685,21 +5685,22 @@ void LoadSettings()
bViewEOLs = IniSectionGetInt(pIniSection,L"ViewEOLs",0);
if (bViewEOLs) bViewEOLs = 1;
- // prefered default encoding is UTF-8 (used for internal codepage too: SC_CP_UTF8)
- iDefaultEncoding = IniSectionGetInt(pIniSection,L"DefaultEncoding", CPI_NONE);
+ iDefaultEncoding = IniSectionGetInt(pIniSection,L"DefaultEncoding", CPI_ANSI_DEFAULT);
+ // if DefaultEncoding is defined as CPI_NONE(-1) explicitly, set to system's current code-page
iDefaultEncoding = (iDefaultEncoding == CPI_NONE) ?
Encoding_MapIniSetting(TRUE, (int)GetACP()) :
Encoding_MapIniSetting(TRUE, iDefaultEncoding);
- if (!Encoding_IsValid(iDefaultEncoding)) iDefaultEncoding = CPI_UTF8;
-
- bLoadASCIIasUTF8 = IniSectionGetInt(pIniSection,L"LoadASCIIasUTF8",0);
- if (bLoadASCIIasUTF8) bLoadASCIIasUTF8 = 1;
- // re-adjust ANSI encoding
- iDefaultEncoding = ((iDefaultEncoding == CPI_ANSI) && (bLoadASCIIasUTF8 == 1)) ? CPI_UTF8 : iDefaultEncoding;
+ if (!Encoding_IsValid(iDefaultEncoding))
+ iDefaultEncoding = CPI_ANSI_DEFAULT;
+ // set flag for encoding default
+ mEncoding[iDefaultEncoding].uFlags |= NCP_DEFAULT;
bSkipUnicodeDetection = IniSectionGetInt(pIniSection, L"SkipUnicodeDetection", 0);
if (bSkipUnicodeDetection) bSkipUnicodeDetection = 1;
+ bLoadASCIIasUTF8 = IniSectionGetInt(pIniSection, L"LoadASCIIasUTF8", 0);
+ if (bLoadASCIIasUTF8) bLoadASCIIasUTF8 = 1;
+
bLoadNFOasOEM = IniSectionGetInt(pIniSection,L"LoadNFOasOEM",1);
if (bLoadNFOasOEM) bLoadNFOasOEM = 1;
@@ -5858,22 +5859,22 @@ void LoadSettings()
LocalFree(pIniSection);
- // define scintilla internal code page
- iInternalCodePage = (iDefaultEncoding == CPI_ANSI) ? 0 : SC_CP_UTF8;
-
+ // define scintilla internal code page, don't use Encoding_GetSciCodePage(iDefaultEncoding) here
+ iDefaultCodePage = (iDefaultEncoding == CPI_ANSI_DEFAULT) ? 0 : SC_CP_UTF8;
{
// check for Chinese, Japan, Korean DBCS code pages and switch accordingly
int acp = (int)GetACP();
if (acp == 932 || acp == 936 || acp == 949 || acp == 950 || acp == 1361)
- iInternalCodePage = acp;
+ iDefaultCodePage = acp;
}
+
{
CHARSETINFO ci;
- if (TranslateCharsetInfo((DWORD*)(UINT_PTR)iInternalCodePage, &ci, TCI_SRCCODEPAGE))
- iInternalCharSet = ci.ciCharset;
+ if (TranslateCharsetInfo((DWORD*)(UINT_PTR)iDefaultCodePage, &ci, TCI_SRCCODEPAGE))
+ iDefaultCharSet = ci.ciCharset;
else
- iInternalCharSet = ANSI_CHARSET;
+ iDefaultCharSet = ANSI_CHARSET;
}
// Scintilla Styles
@@ -5949,8 +5950,8 @@ void SaveSettings(BOOL bSaveSettingsNow)
IniSectionSetInt(pIniSection,L"ViewWhiteSpace",bViewWhiteSpace);
IniSectionSetInt(pIniSection,L"ViewEOLs",bViewEOLs);
IniSectionSetInt(pIniSection,L"DefaultEncoding",Encoding_MapIniSetting(FALSE,iDefaultEncoding));
- IniSectionSetInt(pIniSection,L"LoadASCIIasUTF8",bLoadASCIIasUTF8);
IniSectionSetInt(pIniSection,L"SkipUnicodeDetection",bSkipUnicodeDetection);
+ IniSectionSetInt(pIniSection,L"LoadASCIIasUTF8",bLoadASCIIasUTF8);
IniSectionSetInt(pIniSection,L"LoadNFOasOEM",bLoadNFOasOEM);
IniSectionSetInt(pIniSection,L"NoEncodingTags",bNoEncodingTags);
IniSectionSetInt(pIniSection,L"DefaultEOLMode",iDefaultEOLMode);
@@ -6950,7 +6951,7 @@ BOOL FileLoad(BOOL bDontSave,BOOL bNew,BOOL bReload,BOOL bNoEncDetect,LPCWSTR lp
SendMessage(hwndEdit,SCI_SETEOLMODE,iLineEndings[iDefaultEOLMode],0);
iEncoding = iDefaultEncoding;
iOriginalEncoding = iDefaultEncoding;
- SetInternalCodePage(hwndEdit, iEncoding);
+ SendMessage(hwndEdit,SCI_SETCODEPAGE,Encoding_GetSciCodePage(iDefaultEncoding),0);
EditSetNewText(hwndEdit,"",0);
SetWindowTitle(hwndMain,uidsAppTitle,fIsElevated,IDS_UNTITLED,szCurFile,
iPathNameFormat,bModified || iEncoding != iOriginalEncoding,
@@ -7015,7 +7016,7 @@ BOOL FileLoad(BOOL bDontSave,BOOL bNew,BOOL bReload,BOOL bNoEncDetect,LPCWSTR lp
iEncoding = iDefaultEncoding;
iOriginalEncoding = iDefaultEncoding;
}
- SetInternalCodePage(hwndEdit, iEncoding);
+ SendMessage(hwndEdit,SCI_SETCODEPAGE,Encoding_GetSciCodePage(iEncoding),0);
bReadOnly = FALSE;
EditSetNewText(hwndEdit,"",0);
}
diff --git a/src/Notepad3.rc b/src/Notepad3.rc
index e363faec1..815017b32 100644
--- a/src/Notepad3.rc
+++ b/src/Notepad3.rc
@@ -507,7 +507,7 @@ BEGIN
VK_F8, IDM_ENCODING_RECODE, VIRTKEY, NOINVERT
VK_F8, IDM_EDIT_INSERT_ENCODING, VIRTKEY, CONTROL, NOINVERT
VK_F8, CMD_RELOADNOFILEVARS, VIRTKEY, ALT, NOINVERT
- VK_F8, CMD_RELOADASCIIASUTF8, VIRTKEY, SHIFT, NOINVERT
+ VK_F8, IDM_ENCODING_UTF8, VIRTKEY, SHIFT, NOINVERT
VK_F9, IDM_ENCODING_SELECT, VIRTKEY, NOINVERT
VK_F9, IDM_EDIT_INSERT_FILENAME, VIRTKEY, CONTROL, NOINVERT
VK_F9, IDM_FILE_MANAGEFAV, VIRTKEY, ALT, NOINVERT
@@ -1421,8 +1421,8 @@ BEGIN
IDS_FIND_WRAPRE "Reached the beginning of the document, restarting search at the end."
IDS_NOTFOUND "The specified text was not found."
IDS_REPLCOUNT "%i occurrences of the specified text have been replaced."
- IDS_ASK_ENCODING "Switching the file encoding from ANSI to non-ANSI (and vice versa) may replace unsupported text with default characters, and the undo history will be cleared. Continue?"
- IDS_ASK_ENCODING2 "You are about to change the encoding of an empty file from ANSI to non-ANSI. Note that this will clear the undo history, as it can't be synchronized with the new encoding. Continue?"
+ IDS_ASK_ENCODING "Switching the file encoding from one encoding to another may replace unsupported text with default characters, and the undo history will be cleared. Continue?"
+ IDS_ASK_ENCODING2 "You are about to change the encoding of an empty file. Note that this will clear the undo history, as it can't be synchronized with the new encoding. Continue?"
IDS_ERR_ENCODINGNA "Code page conversion tables for the selected encoding are not available on your system."
IDS_ERR_UNICODE "Error converting this Unicode file.\nData will be lost if the file is saved!"
END
diff --git a/src/Styles.c b/src/Styles.c
index ecf935f2a..0c60d1b15 100644
--- a/src/Styles.c
+++ b/src/Styles.c
@@ -70,7 +70,6 @@ EDITLEXER lexDefault = { SCLEX_NULL, 63000, L"Default Text", L"txt; text; wtx;
/* 23 */ { SCI_SETEXTRAASCENT+SCI_SETEXTRADESCENT, 63123, L"2nd Extra Line Spacing (Size)", L"", L"" },
{ -1, 00000, L"", L"", L"" } } };
-
KEYWORDLIST KeyWords_HTML = {
"!doctype ^aria- ^data- a abbr accept accept-charset accesskey acronym action address align alink "
"alt and applet archive area article aside async audio autocomplete autofocus autoplay axis b "
@@ -2656,7 +2655,7 @@ int iDefaultLexer;
BOOL bAutoSelect;
int cxStyleSelectDlg;
int cyStyleSelectDlg;
-extern int iInternalCharSet;
+extern int iDefaultCharSet;
extern BOOL bHiliteCurrentLine;
@@ -4004,7 +4003,7 @@ BOOL Style_SelectFont(HWND hwnd,LPWSTR lpszStyle,int cchStyle,BOOL bDefaultStyle
if (bDefaultStyle &&
lf.lfCharSet != DEFAULT_CHARSET &&
lf.lfCharSet != ANSI_CHARSET &&
- lf.lfCharSet != iInternalCharSet) {
+ lf.lfCharSet != iDefaultCharSet) {
lstrcat(szNewStyle,L"; charset:");
wsprintf(tch,L"%i",lf.lfCharSet);
lstrcat(szNewStyle,tch);