From da073686dcfc0fbc06f4e89167d9f003284a0d05 Mon Sep 17 00:00:00 2001 From: Rainer Kottenhoff Date: Sat, 5 Aug 2017 20:20:22 +0200 Subject: [PATCH] reverting, and fixing encoding / code-page issues: - consistent encoding <> code-page handling (including Scintilla's code-page settings) - Scintilla issue regarding notepad2-mod issie #173 (see https://github.com/XhmikosR/notepad2-mod/pull/193) - allow arbitrary conversion between encodings (even it it does not make sense in any case) (instead of doing silently nothing but changing encoding info on status bar) --- scintilla/doc/ScintillaDoc.html | 3 +- scintilla/doc/ScintillaHistory.html | 4 +- scintilla/win32/ScintillaWin.cxx | 3 +- src/Edit.c | 203 +++++++++++++++------------- src/Edit.h | 10 +- src/Helpers.c | 15 -- src/Helpers.h | 2 +- src/Notepad3.c | 65 ++++----- src/Notepad3.rc | 6 +- src/Styles.c | 5 +- 10 files changed, 160 insertions(+), 156 deletions(-) diff --git a/scintilla/doc/ScintillaDoc.html b/scintilla/doc/ScintillaDoc.html index a7e7eba36..a588e513f 100644 --- a/scintilla/doc/ScintillaDoc.html +++ b/scintilla/doc/ScintillaDoc.html @@ -2897,7 +2897,8 @@

Style definition

number used by the C and C++ lexer to display literal strings; it has the value 6). This feature works differently on Windows and GTK+.
The default character set is SC_CHARSET_DEFAULT.

-

SC_CHARSET_ANSI and SC_CHARSET_DEFAULT specify European Windows code page 1252 unless the code page is set.

+

SC_CHARSET_ANSI specifies European Windows code page 1252.

+

SC_CHARSET_DEFAULT specifies the system-default code page unless the code page is set.

diff --git a/scintilla/doc/ScintillaHistory.html b/scintilla/doc/ScintillaHistory.html index 683a2c4a5..e6ea0f66e 100644 --- a/scintilla/doc/ScintillaHistory.html +++ b/scintilla/doc/ScintillaHistory.html @@ -1,4 +1,4 @@ - + @@ -1028,7 +1028,7 @@

C++11 range-based for loops used in SciTE so GCC 4.6 is now the minimum supported version.
  • - SC_CHARSET_DEFAULT now means code page 1252 on Windows unless a code page is set. + (REVERTED in Notepad2-mod) SC_CHARSET_DEFAULT now means code page 1252 on Windows unless a code page is set. This prevents unexpected behaviour and crashes on East Asian systems where default locales are commonly DBCS. Projects which want to default to DBCS code pages in East Asian locales should set the code page and character set explicitly. diff --git a/scintilla/win32/ScintillaWin.cxx b/scintilla/win32/ScintillaWin.cxx index 81f288c72..0b47c36ed 100644 --- a/scintilla/win32/ScintillaWin.cxx +++ b/scintilla/win32/ScintillaWin.cxx @@ -1144,7 +1144,8 @@ UINT CodePageFromCharSet(DWORD characterSet, UINT documentCodePage) { } switch (characterSet) { case SC_CHARSET_ANSI: return 1252; - case SC_CHARSET_DEFAULT: return documentCodePage ? documentCodePage : 1252; + //case SC_CHARSET_DEFAULT: return documentCodePage ? documentCodePage : 1252; // SCI orig + case SC_CHARSET_DEFAULT: return documentCodePage; case SC_CHARSET_BALTIC: return 1257; case SC_CHARSET_CHINESEBIG5: return 950; case SC_CHARSET_EASTEUROPE: return 1250; diff --git a/src/Edit.c b/src/Edit.c index 9088d25a6..0185a1bcd 100644 --- a/src/Edit.c +++ b/src/Edit.c @@ -57,9 +57,9 @@ extern BOOL bFixLineEndings; extern BOOL bAutoStripBlanks; -// Internal Codepage and Character Set -extern int iInternalCodePage; -extern int iInternalCharSet; +// Default Codepage and Character Set +extern int iDefaultCodePage; +extern int iDefaultCharSet; extern BOOL bSkipUnicodeDetection; extern BOOL bLoadASCIIasUTF8; extern BOOL bLoadNFOasOEM; @@ -73,15 +73,15 @@ WCHAR wchANSI[8] = L""; WCHAR wchOEM [8] = L""; NP2ENCODING mEncoding[] = { - { NCP_DEFAULT|NCP_RECODE, 0, "ansi,ansi,ascii,", 61000, L"" }, - { NCP_8BIT|NCP_RECODE, 0, "oem,oem,", 61001, L"" }, - { NCP_UNICODE|NCP_UNICODE_BOM, 0, "", 61002, L"" }, - { NCP_UNICODE|NCP_UNICODE_REVERSE|NCP_UNICODE_BOM, 0, "", 61003, L"" }, - { NCP_UNICODE|NCP_RECODE, 0, "utf-16,utf16,unicode,", 61004, L"" }, - { NCP_UNICODE|NCP_UNICODE_REVERSE|NCP_RECODE, 0, "utf-16be,utf16be,unicodebe,", 61005, L"" }, - { NCP_UTF8|NCP_RECODE, CP_UTF8, "utf-8,utf8,", 61006, L"" }, - { NCP_UTF8|NCP_UTF8_SIGN, CP_UTF8, "utf-8,utf8,", 61007, L"" }, - { NCP_8BIT|NCP_RECODE, CP_UTF7, "utf-7,utf7,", 61008, L"" }, + { NCP_ANSI|NCP_RECODE, CP_ACP, "ansi,ansi,ascii,", 61000, L"" }, + { NCP_8BIT|NCP_RECODE, CP_OEMCP, "oem,oem,", 61001, L"" }, + { NCP_UNICODE|NCP_UNICODE_BOM, CP_UTF8, "", 61002, L"" }, + { NCP_UNICODE|NCP_UNICODE_REVERSE|NCP_UNICODE_BOM, CP_UTF8, "", 61003, L"" }, + { NCP_UNICODE|NCP_RECODE, CP_UTF8, "utf-16,utf16,unicode,", 61004, L"" }, + { NCP_UNICODE|NCP_UNICODE_REVERSE|NCP_RECODE, CP_UTF8, "utf-16be,utf16be,unicodebe,", 61005, L"" }, + { NCP_UTF8|NCP_RECODE, CP_UTF8, "utf-8,utf8,", 61006, L"" }, + { NCP_UTF8|NCP_UTF8_SIGN, CP_UTF8, "utf-8,utf8,", 61007, L"" }, + { NCP_8BIT|NCP_RECODE, CP_UTF7, "utf-7,utf7,", 61008, L"" }, { NCP_8BIT|NCP_RECODE, 720, "DOS-720,dos720,", 61009, L"" }, { NCP_8BIT|NCP_RECODE, 28596, "iso-8859-6,iso88596,arabic,csisolatinarabic,ecma114,isoir127,", 61010, L"" }, { NCP_8BIT|NCP_RECODE, 10004, "x-mac-arabic,xmacarabic,", 61011, L"" }, @@ -238,7 +238,7 @@ HWND EditCreate(HWND hwndParent) g_hInstance, NULL); - SetInternalCodePage(hwnd,iInternalCodePage); + SendMessage(hwnd,SCI_SETCODEPAGE,Encoding_GetSciCodePage(iDefaultEncoding),0); SendMessage(hwnd,SCI_SETEOLMODE,SC_EOL_CRLF,0); SendMessage(hwnd,SCI_SETPASTECONVERTENDINGS,1,0); SendMessage(hwnd,SCI_SETMODEVENTMASK,/*SC_MODEVENTMASKALL*/SC_MOD_INSERTTEXT|SC_MOD_DELETETEXT,0); @@ -325,6 +325,9 @@ BOOL EditConvertText(HWND hwnd,int encSource,int encDest,BOOL bSetSavePoint) if (encSource == encDest) return(TRUE); + if (!(Encoding_IsValid(encSource) && Encoding_IsValid(encDest))) + return(FALSE); + length = (int)SendMessage(hwnd,SCI_GETLENGTH,0,0); if (length == 0) { @@ -333,7 +336,7 @@ BOOL EditConvertText(HWND hwnd,int encSource,int encDest,BOOL bSetSavePoint) SendMessage(hwnd,SCI_EMPTYUNDOBUFFER,0,0); SendMessage(hwnd,SCI_CLEARALL,0,0); SendMessage(hwnd,SCI_MARKERDELETEALL,(WPARAM)-1,0); - SetInternalCodePage(hwnd, encDest); + SendMessage(hwnd,SCI_SETCODEPAGE, Encoding_GetSciCodePage(encDest),0); SendMessage(hwnd,SCI_SETUNDOCOLLECTION,1,0); SendMessage(hwnd,EM_EMPTYUNDOBUFFER,0,0); SendMessage(hwnd,SCI_GOTOPOS,0,0); @@ -344,6 +347,7 @@ BOOL EditConvertText(HWND hwnd,int encSource,int encDest,BOOL bSetSavePoint) } else { + pchText = GlobalAlloc(GPTR,length*5+2); tr.lpstrText = pchText; @@ -351,17 +355,17 @@ BOOL EditConvertText(HWND hwnd,int encSource,int encDest,BOOL bSetSavePoint) pwchText = GlobalAlloc(GPTR,length*3+2); - UINT cpSrc = (encSource == CPI_ANSI) ? CP_ACP : ((encSource == CPI_OEM) ? CP_OEMCP : ((encSource == CPI_UTF7) ? CP_UTF7 : CP_UTF8)); - UINT cpDst = (encDest == CPI_ANSI) ? CP_ACP : ((encDest == CPI_OEM) ? CP_OEMCP : ((encDest == CPI_UTF7) ? CP_UTF7 : CP_UTF8)); - cbwText = MultiByteToWideChar(cpSrc,0,pchText,length,pwchText,length*3+2); - cbText = WideCharToMultiByte(cpDst,0,pwchText,cbwText,pchText,length*5+2,NULL,NULL); + UINT cpSrc = mEncoding[encSource].uCodePage; + UINT cpDst = mEncoding[encDest].uCodePage; + cbwText = MultiByteToWideChar(cpSrc,0,pchText,length,pwchText,length*3+2); + cbText = WideCharToMultiByte(cpDst,0,pwchText,cbwText,pchText,length*5+2,NULL,NULL); SendMessage(hwnd,SCI_CANCEL,0,0); SendMessage(hwnd,SCI_SETUNDOCOLLECTION,0,0); SendMessage(hwnd,SCI_EMPTYUNDOBUFFER,0,0); SendMessage(hwnd,SCI_CLEARALL,0,0); SendMessage(hwnd,SCI_MARKERDELETEALL,(WPARAM)-1,0); - SetInternalCodePage(hwnd, encDest); + SendMessage(hwnd,SCI_SETCODEPAGE,Encoding_GetSciCodePage(encDest),0); SendMessage(hwnd,SCI_ADDTEXT,cbText,(LPARAM)pchText); SendMessage(hwnd,SCI_EMPTYUNDOBUFFER,0,0); SendMessage(hwnd,SCI_SETUNDOCOLLECTION,1,0); @@ -383,43 +387,39 @@ BOOL EditSetNewEncoding(HWND hwnd,int iCurrentEncoding,int iNewEncoding,BOOL bNo if (iCurrentEncoding != iNewEncoding) { - if (iCurrentEncoding != CPI_ANSI && iNewEncoding != CPI_ANSI) - return(TRUE); // can only convert from-and-to ANSI - - if (SendMessage(hwnd,SCI_GETLENGTH,0,0) == 0) { - - BOOL bIsEmptyUndoHistory = (SendMessage(hwnd,SCI_CANUNDO,0,0) == 0 && SendMessage(hwnd,SCI_CANREDO,0,0) == 0); + BOOL bOneEncodingIsANSI = (Encoding_IsANSI(iCurrentEncoding) || Encoding_IsANSI(iNewEncoding)); + BOOL bBothEncodingsAreANSI = (Encoding_IsANSI(iCurrentEncoding) && Encoding_IsANSI(iNewEncoding)); + + // conversion between arbirtaty encodings may lead to unexpected results + if (!bOneEncodingIsANSI || bBothEncodingsAreANSI) { + // ~ return(TRUE); // this would imply a successful conversion - it is not ! + // return(FALSE); // commented out ? : allow conversion between arbirtaty encodings + } + + if (SendMessage(hwnd, SCI_GETLENGTH, 0, 0) == 0) { - if ((iCurrentEncoding == CPI_ANSI || iNewEncoding == CPI_ANSI) && - (bNoUI || bIsEmptyUndoHistory || InfoBox(MBYESNO,L"MsgConv2",IDS_ASK_ENCODING2) == IDYES)) { + BOOL bIsEmptyUndoHistory = (SendMessage(hwnd, SCI_CANUNDO, 0, 0) == 0 && SendMessage(hwnd, SCI_CANREDO, 0, 0) == 0); - EditConvertText(hwnd, iCurrentEncoding, iNewEncoding, bSetSavePoint); + BOOL doNewEncoding = (!bIsEmptyUndoHistory && !bNoUI) ? + (InfoBox(MBYESNO, L"MsgConv2", IDS_ASK_ENCODING2) == IDYES) : TRUE; - return(TRUE); + if (doNewEncoding) { + return EditConvertText(hwnd,iCurrentEncoding,iNewEncoding,bSetSavePoint); } - - else - return(FALSE); } - - else if ((iCurrentEncoding == CPI_ANSI || iNewEncoding == CPI_ANSI) && - (bNoUI || InfoBox(MBYESNO,L"MsgConv1",IDS_ASK_ENCODING) == IDYES)) { - - BeginWaitCursor(); - - EditConvertText(hwnd, iCurrentEncoding, iNewEncoding, FALSE); - - EndWaitCursor(); - - return(TRUE); + else { + + BOOL doNewEncoding = (!bNoUI) ? (InfoBox(MBYESNO, L"MsgConv1", IDS_ASK_ENCODING) == IDYES) : TRUE; + + if (doNewEncoding) { + BeginWaitCursor(); + BOOL result = EditConvertText(hwnd,iCurrentEncoding,iNewEncoding,FALSE); + EndWaitCursor(); + return(result); + } } - - else - return(FALSE); - } - - else - return(FALSE); + } + return(FALSE); } @@ -637,7 +637,7 @@ int Encoding_MapIniSetting(BOOL bLoad,int iSetting) { if (bLoad) { switch (iSetting) { case -1: return CPI_NONE; - case 0: return CPI_ANSI; + case 0: return CPI_ANSI_DEFAULT; case 1: return CPI_UNICODEBOM; case 2: return CPI_UNICODEBEBOM; case 3: return CPI_UTF8; @@ -651,14 +651,14 @@ int Encoding_MapIniSetting(BOOL bLoad,int iSetting) { if ((mEncoding[i].uCodePage == (UINT)iSetting) && Encoding_IsValid(i)) return(i); } - return CPI_UTF8; // not found + return CPI_ANSI_DEFAULT; } } } else { switch (iSetting) { case CPI_NONE: return -1; - case CPI_ANSI: return 0; + case CPI_ANSI_DEFAULT: return 0; case CPI_UNICODEBOM: return 1; case CPI_UNICODEBEBOM: return 2; case CPI_UTF8: return 3; @@ -667,7 +667,12 @@ int Encoding_MapIniSetting(BOOL bLoad,int iSetting) { case CPI_UNICODE: return 6; case CPI_UNICODEBE: return 7; case CPI_UTF7: return 8; - default: return(mEncoding[iSetting].uCodePage); + default: { + if (Encoding_IsValid(iSetting)) + return(mEncoding[iSetting].uCodePage); + else + return CPI_ANSI_DEFAULT; + } } } } @@ -783,7 +788,7 @@ void Encoding_AddToListView(HWND hwnd,int idSel,BOOL bRecodeOnly) else StrCpyN(wchBuf,pEE[i].wch,COUNTOF(wchBuf)); - if (id == CPI_ANSI) + if (Encoding_IsANSI(id)) StrCatN(wchBuf,wchANSI,COUNTOF(wchBuf)); else if (id == CPI_OEM) StrCatN(wchBuf,wchOEM,COUNTOF(wchBuf)); @@ -875,7 +880,7 @@ void Encoding_AddToComboboxEx(HWND hwnd,int idSel,BOOL bRecodeOnly) else StrCpyN(wchBuf,pEE[i].wch,COUNTOF(wchBuf)); - if (id == CPI_ANSI) + if (Encoding_IsANSI(id)) StrCatN(wchBuf,wchANSI,COUNTOF(wchBuf)); else if (id == CPI_OEM) StrCatN(wchBuf,wchOEM,COUNTOF(wchBuf)); @@ -921,6 +926,25 @@ BOOL Encoding_GetFromComboboxEx(HWND hwnd,int *pidEncoding) } +BOOL Encoding_IsDefault(int iEncoding) +{ + return (mEncoding[iEncoding].uFlags & NCP_DEFAULT); +} + +BOOL Encoding_IsANSI(int iEncoding) +{ + return (mEncoding[iEncoding].uFlags & NCP_ANSI); +} + +int Encoding_GetSciCodePage(int iEncoding) +{ + if (Encoding_IsDefault(iEncoding)) + return iDefaultCodePage; + + return (Encoding_IsANSI(iEncoding)) ? 0 : SC_CP_UTF8; +} + + BOOL IsUnicode(const char* pBuffer,int cb,LPBOOL lpbBOM,LPBOOL lpbReverse) { int i = 0xFFFF; @@ -1185,13 +1209,14 @@ BOOL EditLoadFile( char* lpData; DWORD cbData; //char *cp; - int _iDefaultEncoding; + int _iPrefEncoding; BOOL bBOM; BOOL bReverse = FALSE; BOOL bPreferOEM = FALSE; + *iEncoding = CPI_ANSI_DEFAULT; *pbUnicodeErr = FALSE; *pbFileTooBig = FALSE; @@ -1205,8 +1230,8 @@ BOOL EditLoadFile( dwLastIOError = GetLastError(); if (hFile == INVALID_HANDLE_VALUE) { - iSrcEncoding = -1; - iWeakSrcEncoding = -1; + iSrcEncoding = CPI_NONE; + iWeakSrcEncoding = CPI_NONE; return FALSE; } @@ -1220,8 +1245,8 @@ BOOL EditLoadFile( if (InfoBox(MBYESNO,L"MsgFileSizeWarning",IDS_WARNLOADBIGFILE) != IDYES) { CloseHandle(hFile); *pbFileTooBig = TRUE; - iSrcEncoding = -1; - iWeakSrcEncoding = -1; + iSrcEncoding = CPI_NONE; + iWeakSrcEncoding = CPI_NONE; return FALSE; } } @@ -1233,8 +1258,8 @@ BOOL EditLoadFile( if (!bReadSuccess) { GlobalFree(lpData); - iSrcEncoding = -1; - iWeakSrcEncoding = -1; + iSrcEncoding = CPI_NONE; + iWeakSrcEncoding = CPI_NONE; return FALSE; } @@ -1245,34 +1270,29 @@ BOOL EditLoadFile( bPreferOEM = TRUE; } - if (!Encoding_IsValid(iDefaultEncoding)) - iDefaultEncoding = bLoadASCIIasUTF8 ? CPI_UTF8 : CPI_ANSI; - - _iDefaultEncoding = (bPreferOEM) ? g_DOSEncoding : iDefaultEncoding; - if (iWeakSrcEncoding != -1 && Encoding_IsValid(iWeakSrcEncoding)) - _iDefaultEncoding = iWeakSrcEncoding; - - *iEncoding = bLoadASCIIasUTF8 ? CPI_UTF8 : CPI_ANSI; + _iPrefEncoding = (bPreferOEM) ? g_DOSEncoding : iDefaultEncoding; + if (Encoding_IsValid(iWeakSrcEncoding)) + _iPrefEncoding = iWeakSrcEncoding; if (cbData == 0) { FileVars_Init(NULL,0,&fvCurFile); *iEOLMode = iLineEndings[iDefaultEOLMode]; - if (iSrcEncoding == -1) { + if (iSrcEncoding == CPI_NONE) { if (bLoadASCIIasUTF8 && !bPreferOEM) *iEncoding = CPI_UTF8; else - *iEncoding = _iDefaultEncoding; + *iEncoding = _iPrefEncoding; } else *iEncoding = iSrcEncoding; - SetInternalCodePage(hwnd, *iEncoding); + SendMessage(hwnd,SCI_SETCODEPAGE,Encoding_GetSciCodePage(*iEncoding),0); EditSetNewText(hwnd,"",0); SendMessage(hwnd,SCI_SETEOLMODE,iLineEndings[iDefaultEOLMode],0); GlobalFree(lpData); } - else if (!bSkipEncodingDetection && (iSrcEncoding == -1 || iSrcEncoding == CPI_UNICODE || iSrcEncoding == CPI_UNICODEBE) && + else if (!bSkipEncodingDetection && (iSrcEncoding == CPI_NONE || iSrcEncoding == CPI_UNICODE || iSrcEncoding == CPI_UNICODEBE) && (iSrcEncoding == CPI_UNICODE || iSrcEncoding == CPI_UNICODEBE || IsUnicode(lpData,cbData,&bBOM,&bReverse)) && (iSrcEncoding == CPI_UNICODE || iSrcEncoding == CPI_UNICODEBE || !IsUTF8Signature(lpData))) // check for UTF-8 signature { @@ -1310,7 +1330,7 @@ BOOL EditLoadFile( } GlobalFree(lpData); - SetInternalCodePage(hwnd, SC_CP_UTF8); + SendMessage(hwnd,SCI_SETCODEPAGE,Encoding_GetSciCodePage(*iEncoding),0); EditSetNewText(hwnd,"",0); FileVars_Init(lpDataUTF8,cbData-1,&fvCurFile); EditSetNewText(hwnd,lpDataUTF8,cbData-1); @@ -1320,7 +1340,7 @@ BOOL EditLoadFile( else { FileVars_Init(lpData,cbData,&fvCurFile); - if (!bSkipEncodingDetection && (iSrcEncoding == -1 || iSrcEncoding == CPI_UTF8 || iSrcEncoding == CPI_UTF8SIGN) && + if (!bSkipEncodingDetection && (iSrcEncoding == CPI_NONE || iSrcEncoding == CPI_UTF8 || iSrcEncoding == CPI_UTF8SIGN) && ((IsUTF8Signature(lpData) || FileVars_IsUTF8(&fvCurFile) || (iSrcEncoding == CPI_UTF8 || iSrcEncoding == CPI_UTF8SIGN) || @@ -1328,11 +1348,11 @@ BOOL EditLoadFile( (((UTF8_mbslen_bytes(UTF8StringStart(lpData)) - 1 != UTF8_mbslen(UTF8StringStart(lpData),IsUTF8Signature(lpData) ? cbData-3 : cbData)) || (!bPreferOEM && ( - mEncoding[_iDefaultEncoding].uFlags & NCP_UTF8 || + mEncoding[_iPrefEncoding].uFlags & NCP_UTF8 || bLoadASCIIasUTF8 )) ))))) && !(FileVars_IsNonUTF8(&fvCurFile) && (iSrcEncoding != CPI_UTF8 && iSrcEncoding != CPI_UTF8SIGN))) { - SetInternalCodePage(hwnd, SC_CP_UTF8); + SendMessage(hwnd,SCI_SETCODEPAGE,Encoding_GetSciCodePage(CPI_UTF8),0); EditSetNewText(hwnd,"",0); if (IsUTF8Signature(lpData)) { EditSetNewText(hwnd,UTF8StringStart(lpData),cbData-3); @@ -1353,20 +1373,20 @@ BOOL EditLoadFile( LPWSTR lpDataWide; int cbDataWide; - if (iSrcEncoding != -1) + if (iSrcEncoding != CPI_NONE) *iEncoding = iSrcEncoding; else { *iEncoding = FileVars_GetEncoding(&fvCurFile); - if (*iEncoding == -1) { + if (*iEncoding == CPI_NONE) { if (fvCurFile.mask & FV_ENCODING) - *iEncoding = bLoadASCIIasUTF8 ? CPI_UTF8 : CPI_ANSI; + *iEncoding = CPI_ANSI_DEFAULT; else { - if (iWeakSrcEncoding == -1) - *iEncoding = _iDefaultEncoding; + if (iWeakSrcEncoding == CPI_NONE) + *iEncoding = _iPrefEncoding; else if (mEncoding[iWeakSrcEncoding].uFlags & NCP_INTERNAL) *iEncoding = iDefaultEncoding; else - *iEncoding = _iDefaultEncoding; + *iEncoding = _iPrefEncoding; } } } @@ -1385,18 +1405,16 @@ BOOL EditLoadFile( cbData = WideCharToMultiByte(CP_UTF8,0,lpDataWide,cbDataWide,lpData,(int)GlobalSize(lpData),NULL,NULL); GlobalFree(lpDataWide); - SetInternalCodePage(hwnd, SC_CP_UTF8); + SendMessage(hwnd,SCI_SETCODEPAGE,Encoding_GetSciCodePage(*iEncoding),0); EditSetNewText(hwnd,"",0); EditSetNewText(hwnd,lpData,cbData); *iEOLMode = EditDetectEOLMode(hwnd,lpData,cbData); GlobalFree(lpData); } - else { - - *iEncoding = bLoadASCIIasUTF8 ? CPI_UTF8 : CPI_ANSI; - SetInternalCodePage(hwnd, *iEncoding); + *iEncoding = iDefaultEncoding; + SendMessage(hwnd,SCI_SETCODEPAGE,Encoding_GetSciCodePage(*iEncoding),0); EditSetNewText(hwnd,"",0); EditSetNewText(hwnd,lpData,cbData); *iEOLMode = EditDetectEOLMode(hwnd,lpData,cbData); @@ -1473,11 +1491,6 @@ BOOL EditSaveFile( if (bAutoStripBlanks) EditStripTrailingBlanks(hwnd,TRUE); - // convert to ANSI, if CP is set to UTF8 - if ((iEncoding == CPI_ANSI) && (iInternalCodePage == SC_CP_UTF8)) { - EditSetNewEncoding(hwnd, CPI_UTF8, CPI_ANSI, FALSE, TRUE); - } - // get text cbData = (int)SendMessage(hwnd,SCI_GETLENGTH,0,0); lpData = GlobalAlloc(GPTR, cbData + 4); //fix: +bom diff --git a/src/Edit.h b/src/Edit.h index 0ace8af70..4625a7297 100644 --- a/src/Edit.h +++ b/src/Edit.h @@ -137,10 +137,11 @@ extern int g_DOSEncoding; #define NCP_UNICODE_REVERSE 16 #define NCP_UNICODE_BOM 32 #define NCP_8BIT 64 -#define NCP_INTERNAL (NCP_DEFAULT|NCP_UTF8|NCP_UTF8_SIGN|NCP_UNICODE|NCP_UNICODE_REVERSE|NCP_UNICODE_BOM) -#define NCP_RECODE 128 +#define NCP_ANSI 128 +#define NCP_INTERNAL (NCP_DEFAULT|NCP_UTF8|NCP_UTF8_SIGN|NCP_UNICODE|NCP_UNICODE_REVERSE|NCP_UNICODE_BOM|NCP_ANSI) +#define NCP_RECODE 256 #define CPI_NONE -1 -#define CPI_ANSI 0 +#define CPI_ANSI_DEFAULT 0 #define CPI_OEM 1 #define CPI_UNICODEBOM 2 #define CPI_UNICODEBEBOM 3 @@ -171,6 +172,9 @@ void Encoding_AddToListView(HWND,int,BOOL); BOOL Encoding_GetFromListView(HWND,int *); void Encoding_AddToComboboxEx(HWND,int,BOOL); BOOL Encoding_GetFromComboboxEx(HWND,int *); +BOOL Encoding_IsDefault(int); +BOOL Encoding_IsANSI(int); +int Encoding_GetSciCodePage(int); BOOL IsUnicode(const char*,int,LPBOOL,LPBOOL); BOOL IsUTF8(const char*,int); diff --git a/src/Helpers.c b/src/Helpers.c index 2754a459a..2c2deb935 100644 --- a/src/Helpers.c +++ b/src/Helpers.c @@ -27,11 +27,6 @@ #include #include "helpers.h" #include "resource.h" -#include "Edit.h" - - -//============================================================================= -extern iInternalCodePage; //============================================================================= @@ -496,16 +491,6 @@ void SetWindowTransparentMode(HWND hwnd,BOOL bTransparentMode) } -//============================================================================= -// -// SetInternalCodePage() -// -void SetInternalCodePage(HWND hwnd, int encoding) { - iInternalCodePage = (encoding == CPI_ANSI) ? 0 : SC_CP_UTF8; - SendMessage(hwnd, SCI_SETCODEPAGE, iInternalCodePage, 0); -} - - //============================================================================= // // CenterDlgInParent() diff --git a/src/Helpers.h b/src/Helpers.h index 8c1356b79..d09b22a25 100644 --- a/src/Helpers.h +++ b/src/Helpers.h @@ -83,7 +83,7 @@ BOOL IsFontAvailable(LPCWSTR); BOOL SetWindowTitle(HWND,UINT,BOOL,UINT,LPCWSTR,int,BOOL,UINT,BOOL,LPCWSTR); void SetWindowTransparentMode(HWND,BOOL); -void SetInternalCodePage(HWND,int); + void CenterDlgInParent(HWND); void GetDlgPos(HWND,LPINT,LPINT); diff --git a/src/Notepad3.c b/src/Notepad3.c index 2a96df52a..1f2bec603 100644 --- a/src/Notepad3.c +++ b/src/Notepad3.c @@ -222,8 +222,8 @@ int iEncoding; int iOriginalEncoding; int iEOLMode; -int iInternalCodePage; -int iInternalCharSet; +int iDefaultCodePage; +int iDefaultCharSet; int iInitialLine; int iInitialColumn; @@ -919,7 +919,7 @@ HWND InitInstance(HINSTANCE hInstance,LPSTR pszCmdLine,int nCmdShow) if (iSrcEncoding != -1) { iEncoding = iSrcEncoding; iOriginalEncoding = iSrcEncoding; - SetInternalCodePage(hwndEdit, iEncoding); + SendMessage(hwndEdit,SCI_SETCODEPAGE,Encoding_GetSciCodePage(iEncoding),0); } } @@ -2097,7 +2097,7 @@ void MsgInitMenu(HWND hwnd,WPARAM wParam,LPARAM lParam) i = IDM_ENCODING_UTF8SIGN; else if (mEncoding[iEncoding].uFlags & NCP_UTF8) i = IDM_ENCODING_UTF8; - else if (mEncoding[iEncoding].uFlags & NCP_DEFAULT) + else if (mEncoding[iEncoding].uFlags & NCP_ANSI) i = IDM_ENCODING_ANSI; else i = -1; @@ -2750,7 +2750,7 @@ LRESULT MsgCommand(HWND hwnd,WPARAM wParam,LPARAM lParam) case IDM_ENCODING_UNICODEREV: iNewEncoding = CPI_UNICODEBEBOM; break; case IDM_ENCODING_UTF8: iNewEncoding = CPI_UTF8; break; case IDM_ENCODING_UTF8SIGN: iNewEncoding = CPI_UTF8SIGN; break; - case IDM_ENCODING_ANSI: iNewEncoding = CPI_ANSI; break; + case IDM_ENCODING_ANSI: iNewEncoding = CPI_ANSI_DEFAULT; break; } } @@ -2763,7 +2763,7 @@ LRESULT MsgCommand(HWND hwnd,WPARAM wParam,LPARAM lParam) iOriginalEncoding = iNewEncoding; } else { - if (iEncoding == CPI_ANSI || iNewEncoding == CPI_ANSI) + if (Encoding_IsANSI(iEncoding) || Encoding_IsANSI(iNewEncoding)) iOriginalEncoding = CPI_NONE; iEncoding = iNewEncoding; } @@ -2785,21 +2785,21 @@ LRESULT MsgCommand(HWND hwnd,WPARAM wParam,LPARAM lParam) WCHAR tchCurFile2[MAX_PATH]; - int iNewEncoding = CPI_NONE; - if (iEncoding != CPI_ANSI) - iNewEncoding = iEncoding; + // file to ANSI is default loading behaviour, recoding does not make sense + int iNewEncoding = Encoding_IsANSI(iEncoding) ? CPI_NONE : iEncoding; + if (iEncoding == CPI_UTF8SIGN) iNewEncoding = CPI_UTF8; - if (iEncoding == CPI_UNICODEBOM) + else if (iEncoding == CPI_UNICODEBOM) iNewEncoding = CPI_UNICODE; - if (iEncoding == CPI_UNICODEBEBOM) + else if (iEncoding == CPI_UNICODEBEBOM) iNewEncoding = CPI_UNICODEBE; if ((bModified || iEncoding != iOriginalEncoding) && MsgBox(MBOKCANCEL,IDS_ASK_RECODE) != IDOK) return(0); - if (RecodeDlg(hwnd,&iNewEncoding)) { - + if (RecodeDlg(hwnd,&iNewEncoding)) + { lstrcpy(tchCurFile2,szCurFile); iSrcEncoding = iNewEncoding; FileLoad(TRUE,FALSE,TRUE,FALSE,tchCurFile2); @@ -4517,7 +4517,7 @@ LRESULT MsgCommand(HWND hwnd,WPARAM wParam,LPARAM lParam) { WCHAR tchCurFile2[MAX_PATH]; if (lstrlen(szCurFile)) { - iSrcEncoding = CPI_ANSI; + iSrcEncoding = CPI_ANSI_DEFAULT; lstrcpy(tchCurFile2,szCurFile); FileLoad(FALSE,FALSE,TRUE,FALSE,tchCurFile2); } @@ -5685,21 +5685,22 @@ void LoadSettings() bViewEOLs = IniSectionGetInt(pIniSection,L"ViewEOLs",0); if (bViewEOLs) bViewEOLs = 1; - // prefered default encoding is UTF-8 (used for internal codepage too: SC_CP_UTF8) - iDefaultEncoding = IniSectionGetInt(pIniSection,L"DefaultEncoding", CPI_NONE); + iDefaultEncoding = IniSectionGetInt(pIniSection,L"DefaultEncoding", CPI_ANSI_DEFAULT); + // if DefaultEncoding is defined as CPI_NONE(-1) explicitly, set to system's current code-page iDefaultEncoding = (iDefaultEncoding == CPI_NONE) ? Encoding_MapIniSetting(TRUE, (int)GetACP()) : Encoding_MapIniSetting(TRUE, iDefaultEncoding); - if (!Encoding_IsValid(iDefaultEncoding)) iDefaultEncoding = CPI_UTF8; - - bLoadASCIIasUTF8 = IniSectionGetInt(pIniSection,L"LoadASCIIasUTF8",0); - if (bLoadASCIIasUTF8) bLoadASCIIasUTF8 = 1; - // re-adjust ANSI encoding - iDefaultEncoding = ((iDefaultEncoding == CPI_ANSI) && (bLoadASCIIasUTF8 == 1)) ? CPI_UTF8 : iDefaultEncoding; + if (!Encoding_IsValid(iDefaultEncoding)) + iDefaultEncoding = CPI_ANSI_DEFAULT; + // set flag for encoding default + mEncoding[iDefaultEncoding].uFlags |= NCP_DEFAULT; bSkipUnicodeDetection = IniSectionGetInt(pIniSection, L"SkipUnicodeDetection", 0); if (bSkipUnicodeDetection) bSkipUnicodeDetection = 1; + bLoadASCIIasUTF8 = IniSectionGetInt(pIniSection, L"LoadASCIIasUTF8", 0); + if (bLoadASCIIasUTF8) bLoadASCIIasUTF8 = 1; + bLoadNFOasOEM = IniSectionGetInt(pIniSection,L"LoadNFOasOEM",1); if (bLoadNFOasOEM) bLoadNFOasOEM = 1; @@ -5858,22 +5859,22 @@ void LoadSettings() LocalFree(pIniSection); - // define scintilla internal code page - iInternalCodePage = (iDefaultEncoding == CPI_ANSI) ? 0 : SC_CP_UTF8; - + // define scintilla internal code page, don't use Encoding_GetSciCodePage(iDefaultEncoding) here + iDefaultCodePage = (iDefaultEncoding == CPI_ANSI_DEFAULT) ? 0 : SC_CP_UTF8; { // check for Chinese, Japan, Korean DBCS code pages and switch accordingly int acp = (int)GetACP(); if (acp == 932 || acp == 936 || acp == 949 || acp == 950 || acp == 1361) - iInternalCodePage = acp; + iDefaultCodePage = acp; } + { CHARSETINFO ci; - if (TranslateCharsetInfo((DWORD*)(UINT_PTR)iInternalCodePage, &ci, TCI_SRCCODEPAGE)) - iInternalCharSet = ci.ciCharset; + if (TranslateCharsetInfo((DWORD*)(UINT_PTR)iDefaultCodePage, &ci, TCI_SRCCODEPAGE)) + iDefaultCharSet = ci.ciCharset; else - iInternalCharSet = ANSI_CHARSET; + iDefaultCharSet = ANSI_CHARSET; } // Scintilla Styles @@ -5949,8 +5950,8 @@ void SaveSettings(BOOL bSaveSettingsNow) IniSectionSetInt(pIniSection,L"ViewWhiteSpace",bViewWhiteSpace); IniSectionSetInt(pIniSection,L"ViewEOLs",bViewEOLs); IniSectionSetInt(pIniSection,L"DefaultEncoding",Encoding_MapIniSetting(FALSE,iDefaultEncoding)); - IniSectionSetInt(pIniSection,L"LoadASCIIasUTF8",bLoadASCIIasUTF8); IniSectionSetInt(pIniSection,L"SkipUnicodeDetection",bSkipUnicodeDetection); + IniSectionSetInt(pIniSection,L"LoadASCIIasUTF8",bLoadASCIIasUTF8); IniSectionSetInt(pIniSection,L"LoadNFOasOEM",bLoadNFOasOEM); IniSectionSetInt(pIniSection,L"NoEncodingTags",bNoEncodingTags); IniSectionSetInt(pIniSection,L"DefaultEOLMode",iDefaultEOLMode); @@ -6950,7 +6951,7 @@ BOOL FileLoad(BOOL bDontSave,BOOL bNew,BOOL bReload,BOOL bNoEncDetect,LPCWSTR lp SendMessage(hwndEdit,SCI_SETEOLMODE,iLineEndings[iDefaultEOLMode],0); iEncoding = iDefaultEncoding; iOriginalEncoding = iDefaultEncoding; - SetInternalCodePage(hwndEdit, iEncoding); + SendMessage(hwndEdit,SCI_SETCODEPAGE,Encoding_GetSciCodePage(iDefaultEncoding),0); EditSetNewText(hwndEdit,"",0); SetWindowTitle(hwndMain,uidsAppTitle,fIsElevated,IDS_UNTITLED,szCurFile, iPathNameFormat,bModified || iEncoding != iOriginalEncoding, @@ -7015,7 +7016,7 @@ BOOL FileLoad(BOOL bDontSave,BOOL bNew,BOOL bReload,BOOL bNoEncDetect,LPCWSTR lp iEncoding = iDefaultEncoding; iOriginalEncoding = iDefaultEncoding; } - SetInternalCodePage(hwndEdit, iEncoding); + SendMessage(hwndEdit,SCI_SETCODEPAGE,Encoding_GetSciCodePage(iEncoding),0); bReadOnly = FALSE; EditSetNewText(hwndEdit,"",0); } diff --git a/src/Notepad3.rc b/src/Notepad3.rc index e363faec1..815017b32 100644 --- a/src/Notepad3.rc +++ b/src/Notepad3.rc @@ -507,7 +507,7 @@ BEGIN VK_F8, IDM_ENCODING_RECODE, VIRTKEY, NOINVERT VK_F8, IDM_EDIT_INSERT_ENCODING, VIRTKEY, CONTROL, NOINVERT VK_F8, CMD_RELOADNOFILEVARS, VIRTKEY, ALT, NOINVERT - VK_F8, CMD_RELOADASCIIASUTF8, VIRTKEY, SHIFT, NOINVERT + VK_F8, IDM_ENCODING_UTF8, VIRTKEY, SHIFT, NOINVERT VK_F9, IDM_ENCODING_SELECT, VIRTKEY, NOINVERT VK_F9, IDM_EDIT_INSERT_FILENAME, VIRTKEY, CONTROL, NOINVERT VK_F9, IDM_FILE_MANAGEFAV, VIRTKEY, ALT, NOINVERT @@ -1421,8 +1421,8 @@ BEGIN IDS_FIND_WRAPRE "Reached the beginning of the document, restarting search at the end." IDS_NOTFOUND "The specified text was not found." IDS_REPLCOUNT "%i occurrences of the specified text have been replaced." - IDS_ASK_ENCODING "Switching the file encoding from ANSI to non-ANSI (and vice versa) may replace unsupported text with default characters, and the undo history will be cleared. Continue?" - IDS_ASK_ENCODING2 "You are about to change the encoding of an empty file from ANSI to non-ANSI. Note that this will clear the undo history, as it can't be synchronized with the new encoding. Continue?" + IDS_ASK_ENCODING "Switching the file encoding from one encoding to another may replace unsupported text with default characters, and the undo history will be cleared. Continue?" + IDS_ASK_ENCODING2 "You are about to change the encoding of an empty file. Note that this will clear the undo history, as it can't be synchronized with the new encoding. Continue?" IDS_ERR_ENCODINGNA "Code page conversion tables for the selected encoding are not available on your system." IDS_ERR_UNICODE "Error converting this Unicode file.\nData will be lost if the file is saved!" END diff --git a/src/Styles.c b/src/Styles.c index ecf935f2a..0c60d1b15 100644 --- a/src/Styles.c +++ b/src/Styles.c @@ -70,7 +70,6 @@ EDITLEXER lexDefault = { SCLEX_NULL, 63000, L"Default Text", L"txt; text; wtx; /* 23 */ { SCI_SETEXTRAASCENT+SCI_SETEXTRADESCENT, 63123, L"2nd Extra Line Spacing (Size)", L"", L"" }, { -1, 00000, L"", L"", L"" } } }; - KEYWORDLIST KeyWords_HTML = { "!doctype ^aria- ^data- a abbr accept accept-charset accesskey acronym action address align alink " "alt and applet archive area article aside async audio autocomplete autofocus autoplay axis b " @@ -2656,7 +2655,7 @@ int iDefaultLexer; BOOL bAutoSelect; int cxStyleSelectDlg; int cyStyleSelectDlg; -extern int iInternalCharSet; +extern int iDefaultCharSet; extern BOOL bHiliteCurrentLine; @@ -4004,7 +4003,7 @@ BOOL Style_SelectFont(HWND hwnd,LPWSTR lpszStyle,int cchStyle,BOOL bDefaultStyle if (bDefaultStyle && lf.lfCharSet != DEFAULT_CHARSET && lf.lfCharSet != ANSI_CHARSET && - lf.lfCharSet != iInternalCharSet) { + lf.lfCharSet != iDefaultCharSet) { lstrcat(szNewStyle,L"; charset:"); wsprintf(tch,L"%i",lf.lfCharSet); lstrcat(szNewStyle,tch);