锘??xml version="1.0" encoding="utf-8" standalone="yes"?> 涓 C++ 涓?string涓巜string浜掕漿 鏂規硶涓錛?/p>
string WideToMutilByte(const wstring& _src) char *szBuf = new char[nBufSize]; WideCharToMultiByte(GetACP(), 0, _src.c_str(),-1, szBuf, nBufSize, 0, FALSE); string strRet(szBuf); delete []szBuf; return strRet; wstring MutilByteToWide(const string& _src) //涓?wsbuf 鍒嗛厤鍐呭瓨 BufSize 涓瓧鑺?br>wchar_t *wsBuf = new wchar_t[nBufSize]; //杞寲涓?unicode 鐨?WideString wstring wstrRet(wsBuf); delete []wsBuf; return wstrRet; 榪欑瘒鏂囩珷閲岋紝鎴戝皢緇欏嚭鍑犵C++ std::string鍜宻td::wstring鐩鎬簰杞崲鐨勮漿鎹㈡柟娉曘?br> #include <string> 鍙互鍙傝僓nicode.org 涓婃湁ConvertUTF.c鍜孋onvertUTF.h 錛堜笅杞藉湴鍧錛?a >http://www.unicode.org/Public/PROGRAMS/CVTUTF/錛?/p>
瀹炵幇鏂囦歡ConvertUTF.c錛氾紙.h鐪侊級 /**//* --------------------------------------------------------------------- Conversions between UTF32, UTF-16, and UTF-8. Source code file. See the header file "ConvertUTF.h" for complete documentation. ------------------------------------------------------------------------ */ static const int halfShift = 10; /**//* used for shifting by 10 bits */ static const UTF32 halfBase = 0x0010000UL; #define UNI_SUR_HIGH_START (UTF32)0xD800 /**//* --------------------------------------------------------------------- */ ConversionResult ConvertUTF32toUTF16 ( /**//* --------------------------------------------------------------------- */ ConversionResult ConvertUTF16toUTF32 ( /**//* --------------------------------------------------------------------- */ /**//* /**//* /**//* /**//* --------------------------------------------------------------------- */ /**//* The interface converts a whole buffer to avoid function-call overhead. /**//* --------------------------------------------------------------------- */ ConversionResult ConvertUTF16toUTF8 ( target += bytesToWrite; /**//* --------------------------------------------------------------------- */ /**//* static Boolean isLegalUTF8(const UTF8 *source, int length) { switch (*source) { case 1: if (*source >= 0x80 && *source < 0xC2) return false; /**//* --------------------------------------------------------------------- */ /**//* /**//* --------------------------------------------------------------------- */ ConversionResult ConvertUTF8toUTF16 ( if (target >= targetEnd) { /**//* --------------------------------------------------------------------- */ ConversionResult ConvertUTF32toUTF8 ( /**//* --------------------------------------------------------------------- */ ConversionResult ConvertUTF8toUTF32 ( if (target >= targetEnd) { /**//* --------------------------------------------------------------------- Note A. --------------------------------------------------------------------- */ 涓?C++ 鐨勫瓧絎︿覆涓嶤#鐨勮漿鍖?/p>
1錛夊皢system::String 杞寲涓篊++鐨剆tring錛?br>// convert_system_string.cpp void MarshalString ( String ^ s, string& os ) { void MarshalString ( String ^ s, wstring& os ) { int main() { cout << a << endl; using namespace System; int main() { // Pin memory so GC can't move it while native function is called // Conversion to char* : err = wcstombs_s(&convertedChars, printf_s("%s\n", ch);
]]>
]]>
]]>
{
int nBufSize = WideCharToMultiByte(GetACP(), 0, _src.c_str(),-1, NULL, 0, 0, FALSE);
szBuf = NULL;
}
{
//璁$畻瀛楃涓?string 杞垚 wchar_t 涔嬪悗鍗犵敤鐨勫唴瀛樺瓧鑺傛暟
int nBufSize = MultiByteToWideChar(GetACP(),0,_src.c_str(),-1,NULL,0);
MultiByteToWideChar(GetACP(),0,_src.c_str(),-1,wsBuf,nBufSize);
wsBuf = NULL;
}
杞澆錛歝sdn
絎竴縐嶆柟娉曪細璋冪敤WideCharToMultiByte()鍜孧ultiByteToWideChar()錛屼唬鐮佸涓嬶紙鍏充簬璇︾粏鐨勮В閲婏紝鍙互鍙傝冦妛indows鏍稿績緙栫▼銆嬶級錛?br>
#include <windows.h>
using namespace std;
//Converting a WChar string to a Ansi string
std::string WChar2Ansi(LPCWSTR pwszSrc)
{
int nLen = WideCharToMultiByte(CP_ACP, 0, pwszSrc, -1, NULL, 0, NULL, NULL);
if (nLen<= 0) return std::string("");
char* pszDst = new char[nLen];
if (NULL == pszDst) return std::string("");
WideCharToMultiByte(CP_ACP, 0, pwszSrc, -1, pszDst, nLen, NULL, NULL);
pszDst[nLen -1] = 0;
std::string strTemp(pszDst);
delete [] pszDst;
return strTemp;
}
string ws2s(wstring& inputws)
{
return WChar2Ansi(inputws.c_str());
}
//Converting a Ansi string to WChar string
std::wstring Ansi2WChar(LPCSTR pszSrc, int nLen)
{
int nSize = MultiByteToWideChar(CP_ACP, 0, (LPCSTR)pszSrc, nLen, 0, 0);
if(nSize <= 0) return NULL;
WCHAR *pwszDst = new WCHAR[nSize+1];
if( NULL == pwszDst) return NULL;
MultiByteToWideChar(CP_ACP, 0,(LPCSTR)pszSrc, nLen, pwszDst, nSize);
pwszDst[nSize] = 0;
if( pwszDst[0] == 0xFEFF) // skip Oxfeff
for(int i = 0; i < nSize; i ++)
pwszDst[i] = pwszDst[i+1];
wstring wcharString(pwszDst);
delete pwszDst;
return wcharString;
}
std::wstring s2ws(const string& s)
{
return Ansi2WChar(s.c_str(),s.size());
}
絎簩縐嶆柟娉曪細閲囩敤ATL灝佽_bstr_t鐨勮繃娓★細錛堟敞錛宊bstr_鏄疢icrosoft Specific鐨勶紝鎵浠ヤ笅闈唬鐮佸彲浠ュ湪VS2005閫氳繃錛屾棤縐繪鎬э級錛?/p>
#include <string>
#include <comutil.h>
using namespace std;
#pragma comment(lib, "comsuppw.lib")
string ws2s(const wstring& ws);
wstring s2ws(const string& s);
string ws2s(const wstring& ws)
{
_bstr_t t = ws.c_str();
char* pchar = (char*)t;
string result = pchar;
return result;
}
wstring s2ws(const string& s)
{
_bstr_t t = s.c_str();
wchar_t* pwchar = (wchar_t*)t;
wstring result = pwchar;
return result;
}
絎笁縐嶆柟娉曪細浣跨敤CRT搴撶殑mbstowcs()鍑芥暟鍜寃cstombs()鍑芥暟錛屽鉤鍙版棤鍏籌紝闇璁懼畾locale銆?/p>
#include <string>
#include <locale.h>
using namespace std;
string ws2s(const wstring& ws)
{
string curLocale = setlocale(LC_ALL, NULL); // curLocale = "C";
setlocale(LC_ALL, "chs");
const wchar_t* _Source = ws.c_str();
size_t _Dsize = 2 * ws.size() + 1;
char *_Dest = new char[_Dsize];
memset(_Dest,0,_Dsize);
wcstombs(_Dest,_Source,_Dsize);
string result = _Dest;
delete []_Dest;
setlocale(LC_ALL, curLocale.c_str());
return result;
}
wstring s2ws(const string& s)
{
setlocale(LC_ALL, "chs");
const char* _Source = s.c_str();
size_t _Dsize = s.size() + 1;
wchar_t *_Dest = new wchar_t[_Dsize];
wmemset(_Dest, 0, _Dsize);
mbstowcs(_Dest,_Source,_Dsize);
wstring result = _Dest;
delete []_Dest;
setlocale(LC_ALL, "C");
return result;
}
浜?utf8.utf16.utf32鐨勭浉浜掕漿鍖?/p>
/**//*
* Copyright 2001-2004 Unicode, Inc.
*
* Disclaimer
*
* This source code is provided as is by Unicode, Inc. No claims are
* made as to fitness for any particular purpose. No warranties of any
* kind are expressed or implied. The recipient agrees to determine
* applicability of information provided. If this file has been
* purchased on magnetic or optical media from Unicode, Inc., the
* sole remedy for any claim will be exchange of defective media
* within 90 days of receipt.
*
* Limitations on Rights to Redistribute This Code
*
* Unicode, Inc. hereby grants the right to freely use the information
* supplied in this file in the creation of products supporting the
* Unicode Standard, and to make copies of this file in any form
* for internal or external distribution as long as this notice
* remains attached.
*/
Author: Mark E. Davis, 1994.
Rev History: Rick McGowan, fixes & updates May 2001.
Sept 2001: fixed const & error conditions per
mods suggested by S. Parent & A. Lillich.
June 2002: Tim Dodd added detection and handling of incomplete
source sequences, enhanced error detection, added casts
to eliminate compiler warnings.
July 2003: slight mods to back out aggressive FFFE detection.
Jan 2004: updated switches in from-UTF8 conversions.
Oct 2004: updated to use UNI_MAX_LEGAL_UTF32 in UTF-32 conversions.
#include "ConvertUTF.h"
#ifdef CVTUTF_DEBUG
#include <stdio.h>
#endif
static const UTF32 halfMask = 0x3FFUL;
#define UNI_SUR_HIGH_END (UTF32)0xDBFF
#define UNI_SUR_LOW_START (UTF32)0xDC00
#define UNI_SUR_LOW_END (UTF32)0xDFFF
#define false 0
#define true 1
const UTF32** sourceStart, const UTF32* sourceEnd,
UTF16** targetStart, UTF16* targetEnd, ConversionFlags flags) {
ConversionResult result = conversionOK;
const UTF32* source = *sourceStart;
UTF16* target = *targetStart;
while (source < sourceEnd) {
UTF32 ch;
if (target >= targetEnd) {
result = targetExhausted; break;
}
ch = *source++;
if (ch <= UNI_MAX_BMP) { /**//* Target is a character <= 0xFFFF */
/**//* UTF-16 surrogate values are illegal in UTF-32; 0xffff or 0xfffe are both reserved values */
if (ch >= UNI_SUR_HIGH_START && ch <= UNI_SUR_LOW_END) {
if (flags == strictConversion) {
--source; /**//* return to the illegal value itself */
result = sourceIllegal;
break;
} else {
*target++ = UNI_REPLACEMENT_CHAR;
}
} else {
*target++ = (UTF16)ch; /**//* normal case */
}
} else if (ch > UNI_MAX_LEGAL_UTF32) {
if (flags == strictConversion) {
result = sourceIllegal;
} else {
*target++ = UNI_REPLACEMENT_CHAR;
}
} else {
/**//* target is a character in range 0xFFFF - 0x10FFFF. */
if (target + 1 >= targetEnd) {
--source; /**//* Back up source pointer! */
result = targetExhausted; break;
}
ch -= halfBase;
*target++ = (UTF16)((ch >> halfShift) + UNI_SUR_HIGH_START);
*target++ = (UTF16)((ch & halfMask) + UNI_SUR_LOW_START);
}
}
*sourceStart = source;
*targetStart = target;
return result;
}
const UTF16** sourceStart, const UTF16* sourceEnd,
UTF32** targetStart, UTF32* targetEnd, ConversionFlags flags) {
ConversionResult result = conversionOK;
const UTF16* source = *sourceStart;
UTF32* target = *targetStart;
UTF32 ch, ch2;
while (source < sourceEnd) {
const UTF16* oldSource = source; /**//* In case we have to back up because of target overflow. */
ch = *source++;
/**//* If we have a surrogate pair, convert to UTF32 first. */
if (ch >= UNI_SUR_HIGH_START && ch <= UNI_SUR_HIGH_END) {
/**//* If the 16 bits following the high surrogate are in the source buffer */
if (source < sourceEnd) {
ch2 = *source;
/**//* If it's a low surrogate, convert to UTF32. */
if (ch2 >= UNI_SUR_LOW_START && ch2 <= UNI_SUR_LOW_END) {
ch = ((ch - UNI_SUR_HIGH_START) << halfShift)
+ (ch2 - UNI_SUR_LOW_START) + halfBase;
++source;
} else if (flags == strictConversion) { /**//* it's an unpaired high surrogate */
--source; /**//* return to the illegal value itself */
result = sourceIllegal;
break;
}
} else { /**//* We don't have the 16 bits following the high surrogate. */
--source; /**//* return to the high surrogate */
result = sourceExhausted;
break;
}
} else if (flags == strictConversion) {
/**//* UTF-16 surrogate values are illegal in UTF-32 */
if (ch >= UNI_SUR_LOW_START && ch <= UNI_SUR_LOW_END) {
--source; /**//* return to the illegal value itself */
result = sourceIllegal;
break;
}
}
if (target >= targetEnd) {
source = oldSource; /**//* Back up source pointer! */
result = targetExhausted; break;
}
*target++ = ch;
}
*sourceStart = source;
*targetStart = target;
#ifdef CVTUTF_DEBUG
if (result == sourceIllegal) {
fprintf(stderr, "ConvertUTF16toUTF32 illegal seq 0x%04x,%04x\n", ch, ch2);
fflush(stderr);
}
#endif
return result;
}
* Index into the table below with the first byte of a UTF-8 sequence to
* get the number of trailing bytes that are supposed to follow it.
* Note that *legal* UTF-8 values can't have 4 or 5-bytes. The table is
* left as-is for anyone who may want to do such conversion, which was
* allowed in earlier algorithms.
*/
static const char trailingBytesForUTF8[256] = {
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, 3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5
};
* Magic values subtracted from a buffer value during UTF8 conversion.
* This table contains as many values as there might be trailing bytes
* in a UTF-8 sequence.
*/
static const UTF32 offsetsFromUTF8[6] = { 0x00000000UL, 0x00003080UL, 0x000E2080UL,
0x03C82080UL, 0xFA082080UL, 0x82082080UL };
* Once the bits are split out into bytes of UTF-8, this is a mask OR-ed
* into the first byte, depending on how many bytes follow. There are
* as many entries in this table as there are UTF-8 sequence types.
* (I.e., one byte sequence, two byte etc.). Remember that sequencs
* for *legal* UTF-8 will be 4 or fewer bytes total.
*/
static const UTF8 firstByteMark[7] = { 0x00, 0x00, 0xC0, 0xE0, 0xF0, 0xF8, 0xFC };
* Constants have been gathered. Loops & conditionals have been removed as
* much as possible for efficiency, in favor of drop-through switches.
* (See "Note A" at the bottom of the file for equivalent code.)
* If your compiler supports it, the "isLegalUTF8" call can be turned
* into an inline function.
*/
const UTF16** sourceStart, const UTF16* sourceEnd,
UTF8** targetStart, UTF8* targetEnd, ConversionFlags flags) {
ConversionResult result = conversionOK;
const UTF16* source = *sourceStart;
UTF8* target = *targetStart;
while (source < sourceEnd) {
UTF32 ch;
unsigned short bytesToWrite = 0;
const UTF32 byteMask = 0xBF;
const UTF32 byteMark = 0x80;
const UTF16* oldSource = source; /**//* In case we have to back up because of target overflow. */
ch = *source++;
/**//* If we have a surrogate pair, convert to UTF32 first. */
if (ch >= UNI_SUR_HIGH_START && ch <= UNI_SUR_HIGH_END) {
/**//* If the 16 bits following the high surrogate are in the source buffer */
if (source < sourceEnd) {
UTF32 ch2 = *source;
/**//* If it's a low surrogate, convert to UTF32. */
if (ch2 >= UNI_SUR_LOW_START && ch2 <= UNI_SUR_LOW_END) {
ch = ((ch - UNI_SUR_HIGH_START) << halfShift)
+ (ch2 - UNI_SUR_LOW_START) + halfBase;
++source;
} else if (flags == strictConversion) { /**//* it's an unpaired high surrogate */
--source; /**//* return to the illegal value itself */
result = sourceIllegal;
break;
}
} else { /**//* We don't have the 16 bits following the high surrogate. */
--source; /**//* return to the high surrogate */
result = sourceExhausted;
break;
}
} else if (flags == strictConversion) {
/**//* UTF-16 surrogate values are illegal in UTF-32 */
if (ch >= UNI_SUR_LOW_START && ch <= UNI_SUR_LOW_END) {
--source; /**//* return to the illegal value itself */
result = sourceIllegal;
break;
}
}
/**//* Figure out how many bytes the result will require */
if (ch < (UTF32)0x80) { bytesToWrite = 1;
} else if (ch < (UTF32)0x800) { bytesToWrite = 2;
} else if (ch < (UTF32)0x10000) { bytesToWrite = 3;
} else if (ch < (UTF32)0x110000) { bytesToWrite = 4;
} else { bytesToWrite = 3;
ch = UNI_REPLACEMENT_CHAR;
}
if (target > targetEnd) {
source = oldSource; /**//* Back up source pointer! */
target -= bytesToWrite; result = targetExhausted; break;
}
switch (bytesToWrite) { /**//* note: everything falls through. */
case 4: *--target = (UTF8)((ch | byteMark) & byteMask); ch >>= 6;
case 3: *--target = (UTF8)((ch | byteMark) & byteMask); ch >>= 6;
case 2: *--target = (UTF8)((ch | byteMark) & byteMask); ch >>= 6;
case 1: *--target = (UTF8)(ch | firstByteMark[bytesToWrite]);
}
target += bytesToWrite;
}
*sourceStart = source;
*targetStart = target;
return result;
}
* Utility routine to tell whether a sequence of bytes is legal UTF-8.
* This must be called with the length pre-determined by the first byte.
* If not calling this from ConvertUTF8to*, then the length can be set by:
* length = trailingBytesForUTF8[*source]+1;
* and the sequence is illegal right away if there aren't that many bytes
* available.
* If presented with a length > 4, this returns false. The Unicode
* definition of UTF-8 goes up to 4-byte sequences.
*/
UTF8 a;
const UTF8 *srcptr = source+length;
switch (length) {
default: return false;
/**//* Everything else falls through when "true" */
case 4: if ((a = (*--srcptr)) < 0x80 || a > 0xBF) return false;
case 3: if ((a = (*--srcptr)) < 0x80 || a > 0xBF) return false;
case 2: if ((a = (*--srcptr)) > 0xBF) return false;
/**//* no fall-through in this inner switch */
case 0xE0: if (a < 0xA0) return false; break;
case 0xED: if (a > 0x9F) return false; break;
case 0xF0: if (a < 0x90) return false; break;
case 0xF4: if (a > 0x8F) return false; break;
default: if (a < 0x80) return false;
}
}
if (*source > 0xF4) return false;
return true;
}
* Exported function to return whether a UTF-8 sequence is legal or not.
* This is not used here; it's just exported.
*/
Boolean isLegalUTF8Sequence(const UTF8 *source, const UTF8 *sourceEnd) {
int length = trailingBytesForUTF8[*source]+1;
if (source+length > sourceEnd) {
return false;
}
return isLegalUTF8(source, length);
}
const UTF8** sourceStart, const UTF8* sourceEnd,
UTF16** targetStart, UTF16* targetEnd, ConversionFlags flags) {
ConversionResult result = conversionOK;
const UTF8* source = *sourceStart;
UTF16* target = *targetStart;
while (source < sourceEnd) {
UTF32 ch = 0;
unsigned short extraBytesToRead = trailingBytesForUTF8[*source];
if (source + extraBytesToRead >= sourceEnd) {
result = sourceExhausted; break;
}
/**//* Do this check whether lenient or strict */
if (! isLegalUTF8(source, extraBytesToRead+1)) {
result = sourceIllegal;
break;
}
/**//*
* The cases all fall through. See "Note A" below.
*/
switch (extraBytesToRead) {
case 5: ch += *source++; ch <<= 6; /**//* remember, illegal UTF-8 */
case 4: ch += *source++; ch <<= 6; /**//* remember, illegal UTF-8 */
case 3: ch += *source++; ch <<= 6;
case 2: ch += *source++; ch <<= 6;
case 1: ch += *source++; ch <<= 6;
case 0: ch += *source++;
}
ch -= offsetsFromUTF8[extraBytesToRead];
source -= (extraBytesToRead+1); /**//* Back up source pointer! */
result = targetExhausted; break;
}
if (ch <= UNI_MAX_BMP) { /**//* Target is a character <= 0xFFFF */
/**//* UTF-16 surrogate values are illegal in UTF-32 */
if (ch >= UNI_SUR_HIGH_START && ch <= UNI_SUR_LOW_END) {
if (flags == strictConversion) {
source -= (extraBytesToRead+1); /**//* return to the illegal value itself */
result = sourceIllegal;
break;
} else {
*target++ = UNI_REPLACEMENT_CHAR;
}
} else {
*target++ = (UTF16)ch; /**//* normal case */
}
} else if (ch > UNI_MAX_UTF16) {
if (flags == strictConversion) {
result = sourceIllegal;
source -= (extraBytesToRead+1); /**//* return to the start */
break; /**//* Bail out; shouldn't continue */
} else {
*target++ = UNI_REPLACEMENT_CHAR;
}
} else {
/**//* target is a character in range 0xFFFF - 0x10FFFF. */
if (target + 1 >= targetEnd) {
source -= (extraBytesToRead+1); /**//* Back up source pointer! */
result = targetExhausted; break;
}
ch -= halfBase;
*target++ = (UTF16)((ch >> halfShift) + UNI_SUR_HIGH_START);
*target++ = (UTF16)((ch & halfMask) + UNI_SUR_LOW_START);
}
}
*sourceStart = source;
*targetStart = target;
return result;
}
const UTF32** sourceStart, const UTF32* sourceEnd,
UTF8** targetStart, UTF8* targetEnd, ConversionFlags flags) {
ConversionResult result = conversionOK;
const UTF32* source = *sourceStart;
UTF8* target = *targetStart;
while (source < sourceEnd) {
UTF32 ch;
unsigned short bytesToWrite = 0;
const UTF32 byteMask = 0xBF;
const UTF32 byteMark = 0x80;
ch = *source++;
if (flags == strictConversion ) {
/**//* UTF-16 surrogate values are illegal in UTF-32 */
if (ch >= UNI_SUR_HIGH_START && ch <= UNI_SUR_LOW_END) {
--source; /**//* return to the illegal value itself */
result = sourceIllegal;
break;
}
}
/**//*
* Figure out how many bytes the result will require. Turn any
* illegally large UTF32 things (> Plane 17) into replacement chars.
*/
if (ch < (UTF32)0x80) { bytesToWrite = 1;
} else if (ch < (UTF32)0x800) { bytesToWrite = 2;
} else if (ch < (UTF32)0x10000) { bytesToWrite = 3;
} else if (ch <= UNI_MAX_LEGAL_UTF32) { bytesToWrite = 4;
} else { bytesToWrite = 3;
ch = UNI_REPLACEMENT_CHAR;
result = sourceIllegal;
}
target += bytesToWrite;
if (target > targetEnd) {
--source; /**//* Back up source pointer! */
target -= bytesToWrite; result = targetExhausted; break;
}
switch (bytesToWrite) { /**//* note: everything falls through. */
case 4: *--target = (UTF8)((ch | byteMark) & byteMask); ch >>= 6;
case 3: *--target = (UTF8)((ch | byteMark) & byteMask); ch >>= 6;
case 2: *--target = (UTF8)((ch | byteMark) & byteMask); ch >>= 6;
case 1: *--target = (UTF8) (ch | firstByteMark[bytesToWrite]);
}
target += bytesToWrite;
}
*sourceStart = source;
*targetStart = target;
return result;
}
const UTF8** sourceStart, const UTF8* sourceEnd,
UTF32** targetStart, UTF32* targetEnd, ConversionFlags flags) {
ConversionResult result = conversionOK;
const UTF8* source = *sourceStart;
UTF32* target = *targetStart;
while (source < sourceEnd) {
UTF32 ch = 0;
unsigned short extraBytesToRead = trailingBytesForUTF8[*source];
if (source + extraBytesToRead >= sourceEnd) {
result = sourceExhausted; break;
}
/**//* Do this check whether lenient or strict */
if (! isLegalUTF8(source, extraBytesToRead+1)) {
result = sourceIllegal;
break;
}
/**//*
* The cases all fall through. See "Note A" below.
*/
switch (extraBytesToRead) {
case 5: ch += *source++; ch <<= 6;
case 4: ch += *source++; ch <<= 6;
case 3: ch += *source++; ch <<= 6;
case 2: ch += *source++; ch <<= 6;
case 1: ch += *source++; ch <<= 6;
case 0: ch += *source++;
}
ch -= offsetsFromUTF8[extraBytesToRead];
source -= (extraBytesToRead+1); /**//* Back up the source pointer! */
result = targetExhausted; break;
}
if (ch <= UNI_MAX_LEGAL_UTF32) {
/**//*
* UTF-16 surrogate values are illegal in UTF-32, and anything
* over Plane 17 (> 0x10FFFF) is illegal.
*/
if (ch >= UNI_SUR_HIGH_START && ch <= UNI_SUR_LOW_END) {
if (flags == strictConversion) {
source -= (extraBytesToRead+1); /**//* return to the illegal value itself */
result = sourceIllegal;
break;
} else {
*target++ = UNI_REPLACEMENT_CHAR;
}
} else {
*target++ = ch;
}
} else { /**//* i.e., ch > UNI_MAX_LEGAL_UTF32 */
result = sourceIllegal;
*target++ = UNI_REPLACEMENT_CHAR;
}
}
*sourceStart = source;
*targetStart = target;
return result;
}
The fall-through switches in UTF-8 reading code save a
temp variable, some decrements & conditionals. The switches
are equivalent to the following loop:
{
int tmpBytesToRead = extraBytesToRead+1;
do {
ch += *source++;
--tmpBytesToRead;
if (tmpBytesToRead) ch <<= 6;
} while (tmpBytesToRead > 0);
}
In UTF-8 writing code, the switches on "bytesToWrite" are
similarly unrolled loops.
// compile with: /clr
#include <string>
#include <iostream>
using namespace std;
using namespace System;
using namespace Runtime::InteropServices;
const char* chars =
(const char*)(Marshal::StringToHGlobalAnsi(s)).ToPointer();
os = chars;
Marshal::FreeHGlobal(IntPtr((void*)chars));
}
using namespace Runtime::InteropServices;
const wchar_t* chars =
(const wchar_t*)(Marshal::StringToHGlobalUni(s)).ToPointer();
os = chars;
Marshal::FreeHGlobal(IntPtr((void*)chars));
}
string a = "test";
wstring b = L"test2";
String ^ c = gcnew String("abcd");
MarshalString(c, a);
c = "efgh";
MarshalString(c, b);
cout << a << endl;
wcout << b << endl;
}
2錛夊皢System::String杞寲涓篶har*鎴杦_char*
// convert_string_to_wchar.cpp
// compile with: /clr
#include < stdio.h >
#include < stdlib.h >
#include < vcclr.h >
String ^str = "Hello";
pin_ptr<const wchar_t> wch = PtrToStringChars(str);
printf_s("%S\n", wch);
// Can just convert wchar_t* to char* using one of the
// conversion functions such as:
// WideCharToMultiByte()
// wcstombs_s()
// etc
size_t convertedChars = 0;
size_t sizeInBytes = ((str->Length + 1) * 2);
errno_t err = 0;
char *ch = (char *)malloc(sizeInBytes);
ch, sizeInBytes,
wch, sizeInBytes);
if (err != 0)
printf_s("wcstombs_s failed!\n");
}
]]>
浣滆咃細杞澆 杞創鑷細杞澆 鐐瑰嚮鏁幫細827 鏂囩珷褰曞叆錛?zhaizl
渚嬪瀛楃"姹?鐨剈nicode鏄?C49錛屾妸榪欎釜unicode瀛楃琛ㄧず涓轟竴涓ぇ鏁存暟錛岀劧鍚庤漿鍙樻垚澶氬瓧鑺傜紪鐮?10110001001001錛?br>
瑙傚療榪欎釜鏁存暟鐨勪簩榪涘埗鐮佸簭鍒楋紙110錛?10001錛?01001錛?br> 浠庡悗寰鍓嶅彇
濡傛灉榪欎釜浜岃繘鍒跺簭鍒楀彧鏈夊悗7浣嶏紙灝忎簬128,涔熷氨鏄痑scii瀛楃錛夊垯鐩存帴鍙栧悗7浣嶄簩榪涘埗鏁板艦鎴愪竴涓猽tf8瀛楃銆?br>
涓婇潰鐨勫瓧絎?#8220;姹?#8221;浜岃繘鍒跺簭鍒楀ぇ浜?浣嶏紝鎵浠ュ彇鍚?浣?1001001)錛屽姞10褰㈡垚涓涓猽tf8瀛楄妭錛?0 001001 ,16榪涘埗89錛夈?br>
鍓╀笅鐨勪簩榪涘埗搴忓垪錛?10錛?10001錛変粠鍚庡悜鍓嶅彇6浣嶏紝鍔?0褰㈡垚涓涓猽tf8瀛楄妭錛?0 110001錛?6榪涘埗B1錛夈?br>
鍓╀笅鐨勪簩榪涘埗搴忓垪錛?10錛変粠鍚庡悜鍓嶅彇6浣嶏紝鐢變簬涓嶈凍6浣嶏紝灝嗚繖涓暟鍜?110000鐩告垨錛屽緱鍒板瓧絎?1100110錛?6榪涘埗E6
鏈鍚庯紝灝卞緱鍒頒簡utf8緙栫爜錛?6榪涘埗琛ㄧず涓篍6B189
|
鍦ㄧ綉緇滀腑鏈夊緢澶氬湴鏂歸兘鏈夐噰鐢║TF8緙栫爜錛岀敱浜庤緙栧啓涓庨偖浠舵湇鍔$鏈夊叧鐨勭▼搴忥紝鑰岄偖浠舵湇鍔$鏈変簺鍦版柟鐢ㄥ埌浜哢TF8緙栫爜錛屾墍浠ュ瀹冩湁浜嗗垵姝ョ殑璁よ瘑錛?br>
|
[鍘熷垱鏂囩珷錛岃漿杞借淇濈暀鎴栨敞鏄庡嚭澶勶細http://www.regexlab.com/zh/encoding.htm]
綰у埆錛氫腑綰?/p>
鎽樿錛氭湰鏂囦粙緇嶄簡瀛楃涓庣紪鐮佺殑鍙戝睍榪囩▼錛岀浉鍏蟲蹇電殑姝g‘鐞嗚В銆備婦渚嬭鏄庝簡涓浜涘疄闄呭簲鐢ㄤ腑錛岀紪鐮佺殑瀹炵幇鏂規硶銆傜劧鍚庯紝鏈枃璁茶堪浜嗛氬父瀵瑰瓧絎︿笌緙栫爜鐨勫嚑縐嶈瑙o紝鐢變簬榪欎簺璇В鑰屽鑷翠貢鐮佷駭鐢熺殑鍘熷洜錛屼互鍙婃秷闄や貢鐮佺殑鍔炴硶銆傛湰鏂囩殑鍐呭娑電洊浜?#8220;涓枃闂”錛?#8220;涔辯爜闂”銆?/p>
鎺屾彙緙栫爜闂鐨勫叧閿槸姝g‘鍦扮悊瑙g浉鍏蟲蹇碉紝緙栫爜鎵娑夊強鐨勬妧鏈叾瀹炴槸寰堢畝鍗曠殑銆傚洜姝わ紝闃呰鏈枃鏃墮渶瑕佹參璇誨鎯籌紝澶氭濊冦?/p>
“瀛楃涓庣紪鐮?#8221;鏄竴涓緇忓父璁ㄨ鐨勮瘽棰樸傚嵆浣胯繖鏍鳳紝鏃跺父鍑虹幇鐨勪貢鐮佷粛鐒跺洶鎵扮潃澶у銆傝櫧鐒舵垜浠湁寰堝鐨勫姙娉曞彲浠ョ敤鏉ユ秷闄や貢鐮侊紝浣嗘垜浠茍涓嶄竴瀹氱悊瑙h繖浜涘姙娉曠殑鍐呭湪鍘熺悊銆傝屾湁鐨勪貢鐮佷駭鐢熺殑鍘熷洜錛屽疄闄呬笂鐢變簬搴曞眰浠g爜鏈韓鏈夐棶棰樻墍瀵艱嚧鐨勩傚洜姝わ紝涓嶄粎鏄垵瀛﹁呬細瀵瑰瓧絎︾紪鐮佹劅鍒版ā緋婏紝鏈夌殑搴曞眰寮鍙戜漢鍛樺悓鏍峰瀛楃緙栫爜緙轟箯鍑嗙‘鐨勭悊瑙c?/p>
|
浠庤綆楁満瀵瑰鍥借璦鐨勬敮鎸佽搴︾湅錛屽ぇ鑷村彲浠ュ垎涓轟笁涓樁孌碉細
| 銆 | 緋葷粺鍐呯爜 | 璇存槑 | 緋葷粺 |
| 闃舵涓 | ASCII | 璁$畻鏈哄垰寮濮嬪彧鏀寔鑻辮錛屽叾瀹冭璦涓嶈兘澶熷湪璁$畻鏈轟笂瀛樺偍鍜屾樉紺恒?/td> | 鑻辨枃 DOS |
| 闃舵浜?/td> | ANSI緙栫爜 錛堟湰鍦板寲錛?/td> | 涓轟嬌璁$畻鏈烘敮鎸佹洿澶氳璦錛岄氬父浣跨敤 0x80~0xFF 鑼冨洿鐨?2 涓瓧鑺傛潵琛ㄧず 1 涓瓧絎︺傛瘮濡傦細姹夊瓧 '涓? 鍦ㄤ腑鏂囨搷浣滅郴緇熶腑錛屼嬌鐢?[0xD6,0xD0] 榪欎袱涓瓧鑺傚瓨鍌ㄣ?br> 涓嶅悓鐨勫浗瀹跺拰鍦板尯鍒跺畾浜嗕笉鍚岀殑鏍囧噯錛岀敱姝や駭鐢熶簡 GB2312, BIG5, JIS 絳夊悇鑷殑緙栫爜鏍囧噯銆傝繖浜涗嬌鐢?2 涓瓧鑺傛潵浠h〃涓涓瓧絎︾殑鍚勭姹夊瓧寤朵幾緙栫爜鏂瑰紡錛岀О涓?strong> ANSI 緙栫爜銆傚湪綆浣撲腑鏂囩郴緇熶笅錛孉NSI 緙栫爜浠h〃 GB2312 緙栫爜錛屽湪鏃ユ枃鎿嶄綔緋葷粺涓嬶紝ANSI 緙栫爜浠h〃 JIS 緙栫爜銆?br> 涓嶅悓 ANSI 緙栫爜涔嬮棿浜掍笉鍏煎錛屽綋淇℃伅鍦ㄥ浗闄呴棿浜ゆ祦鏃訛紝鏃犳硶灝嗗睘浜庝袱縐嶈璦鐨勬枃瀛楋紝瀛樺偍鍦ㄥ悓涓孌?strong> ANSI 緙栫爜鐨勬枃鏈腑銆?/td> | 涓枃 DOS錛屼腑鏂?Windows 95/98錛屾棩鏂?Windows 95/98 |
| 闃舵涓?/td> | UNICODE 錛堝浗闄呭寲錛?/td> | 涓轟簡浣垮浗闄呴棿淇℃伅浜ゆ祦鏇村姞鏂逛究錛屽浗闄呯粍緇囧埗瀹氫簡 UNICODE 瀛楃闆?/strong>錛屼負鍚勭璇█涓殑姣忎竴涓瓧絎﹁瀹氫簡緇熶竴騫朵笖鍞竴鐨勬暟瀛楃紪鍙鳳紝浠ユ弧瓚寵法璇█銆佽法騫沖彴榪涜鏂囨湰杞崲銆佸鐞嗙殑瑕佹眰銆?/td>
| Windows NT/2000/XP錛孡inux錛孞ava |
|
瀛楃涓插湪鍐呭瓨涓殑瀛樻斁鏂規硶錛?/p>
鍦?ASCII 闃舵錛?strong>鍗曞瓧鑺傚瓧絎︿覆浣跨敤涓涓瓧鑺傚瓨鏀句竴涓瓧絎︼紙SBCS錛夈傛瘮濡傦紝"Bob123" 鍦ㄥ唴瀛樹腑涓猴細
| 42 | 6F | 62 | 31 | 32 | 33 | 00 |
| B | o | b | 1 | 2 | 3 | \0 |
鍦ㄤ嬌鐢?ANSI 緙栫爜鏀寔澶氱璇█闃舵錛屾瘡涓瓧絎︿嬌鐢ㄤ竴涓瓧鑺傛垨澶氫釜瀛楄妭鏉ヨ〃紺猴紙MBCS錛夛紝鍥犳錛岃繖縐嶆柟寮忓瓨鏀劇殑瀛楃涔熻縐頒綔澶氬瓧鑺傚瓧絎?/strong>銆傛瘮濡傦紝"涓枃123" 鍦ㄤ腑鏂?Windows 95 鍐呭瓨涓負7涓瓧鑺傦紝姣忎釜姹夊瓧鍗?涓瓧鑺傦紝姣忎釜鑻辨枃鍜屾暟瀛楀瓧絎﹀崰1涓瓧鑺傦細
| D6 | D0 | CE | C4 | 31 | 32 | 33 | 00 |
| 涓?/td> | 鏂?/td> | 1 | 2 | 3 | \0 | ||
鍦?UNICODE 琚噰鐢ㄤ箣鍚庯紝璁$畻鏈哄瓨鏀懼瓧絎︿覆鏃訛紝鏀逛負瀛樻斁姣忎釜瀛楃鍦?UNICODE 瀛楃闆嗕腑鐨勫簭鍙楓傜洰鍓嶈綆楁満涓鑸嬌鐢?2 涓瓧鑺傦紙16 浣嶏級鏉ュ瓨鏀句竴涓簭鍙鳳紙DBCS錛夛紝鍥犳錛岃繖縐嶆柟寮忓瓨鏀劇殑瀛楃涔熻縐頒綔瀹藉瓧鑺傚瓧絎?/strong>銆傛瘮濡傦紝瀛楃涓?"涓枃123" 鍦?Windows 2000 涓嬶紝鍐呭瓨涓疄闄呭瓨鏀劇殑鏄?5 涓簭鍙鳳細
| 2D | 4E | 87 | 65 | 31 | 00 | 32 | 00 | 33 | 00 | 00 | 00 | ← 鍦?x86 CPU 涓紝浣庡瓧鑺傚湪鍓?/font> |
| 涓?/td> | 鏂?/td> | 1 | 2 | 3 | \0 | 銆 | ||||||
涓鍏卞崰 10 涓瓧鑺傘?/p>
|
鐞嗚В緙栫爜鐨勫叧閿紝鏄鎶婂瓧絎︾殑姒傚康鍜屽瓧鑺傜殑姒傚康鐞嗚В鍑嗙‘銆傝繖涓や釜姒傚康瀹規槗娣鋒穯錛屾垜浠湪姝ゅ仛涓涓嬪尯鍒嗭細
| 銆 | 姒傚康鎻忚堪 | 涓句緥 |
| 瀛楃 | 浜轟滑浣跨敤鐨勮鍙鳳紝鎶借薄鎰忎箟涓婄殑涓涓鍙楓?/td> | '1', '涓?, 'a', '$', '錕?, …… |
| 瀛楄妭 | 璁$畻鏈轟腑瀛樺偍鏁版嵁鐨勫崟鍏冿紝涓涓?浣嶇殑浜岃繘鍒舵暟錛屾槸涓涓緢鍏蜂綋鐨勫瓨鍌ㄧ┖闂淬?/td> | 0x01, 0x45, 0xFA, …… |
| ANSI 瀛楃涓?/td> | 鍦ㄥ唴瀛樹腑錛屽鏋?#8220;瀛楃”鏄互 ANSI 緙栫爜褰㈠紡瀛樺湪鐨勶紝涓涓瓧絎﹀彲鑳戒嬌鐢ㄤ竴涓瓧鑺傛垨澶氫釜瀛楄妭鏉ヨ〃紺猴紝閭d箞鎴戜滑縐拌繖縐嶅瓧絎︿覆涓?ANSI 瀛楃涓?/strong>鎴栬?strong>澶氬瓧鑺傚瓧絎︿覆銆?/td> | "涓枃123" 錛堝崰7瀛楄妭錛?/font> |
| UNICODE 瀛楃涓?/td> | 鍦ㄥ唴瀛樹腑錛屽鏋?#8220;瀛楃”鏄互鍦?UNICODE 涓殑搴忓彿瀛樺湪鐨勶紝閭d箞鎴戜滑縐拌繖縐嶅瓧絎︿覆涓?UNICODE 瀛楃涓?/strong>鎴栬?strong>瀹藉瓧鑺傚瓧絎︿覆銆?/td> | L"涓枃123" 錛堝崰10瀛楄妭錛?/font> |
鐢變簬涓嶅悓 ANSI 緙栫爜鎵瑙勫畾鐨勬爣鍑嗘槸涓嶇浉鍚岀殑錛屽洜姝わ紝瀵逛簬涓涓粰瀹氱殑澶氬瓧鑺傚瓧絎︿覆錛屾垜浠繀欏葷煡閬撳畠閲囩敤鐨勬槸鍝竴縐嶇紪鐮佽鍒欙紝鎵嶈兘澶熺煡閬撳畠鍖呭惈浜嗗摢浜?#8220;瀛楃”銆傝屽浜?UNICODE 瀛楃涓?/strong>鏉ヨ錛屼笉綆″湪浠涔堢幆澧冧笅錛屽畠鎵浠h〃鐨?#8220;瀛楃”鍐呭鎬繪槸涓嶅彉鐨勩?/p>
鍚勪釜鍥藉鍜屽湴鍖烘墍鍒跺畾鐨勪笉鍚?ANSI 緙栫爜鏍囧噯涓紝閮藉彧瑙勫畾浜嗗悇鑷璦鎵闇鐨?#8220;瀛楃”銆傛瘮濡傦細姹夊瓧鏍囧噯錛圙B2312錛変腑娌℃湁瑙勫畾闊╁浗璇瓧絎︽庢牱瀛樺偍銆傝繖浜?ANSI 緙栫爜鏍囧噯鎵瑙勫畾鐨勫唴瀹瑰寘鍚袱灞傚惈涔夛細 鍚勪釜鍥藉鍜屽湴鍖哄湪鍒跺畾緙栫爜鏍囧噯鐨勬椂鍊欙紝“瀛楃鐨勯泦鍚?#8221;鍜?#8220;緙栫爜”涓鑸兘鏄悓鏃跺埗瀹氱殑銆傚洜姝わ紝騫沖父鎴戜滑鎵璇寸殑“瀛楃闆?#8221;錛屾瘮濡傦細GB2312, GBK, JIS 絳夛紝闄や簡鏈?#8220;瀛楃鐨勯泦鍚?#8221;榪欏眰鍚箟澶栵紝鍚屾椂涔熷寘鍚簡“緙栫爜”鐨勫惈涔夈?/p>
“UNICODE 瀛楃闆?/strong>”鍖呭惈浜嗗悇縐嶈璦涓嬌鐢ㄥ埌鐨勬墍鏈?#8220;瀛楃”銆傜敤鏉ョ粰 UNICODE 瀛楃闆嗙紪鐮佺殑鏍囧噯鏈夊緢澶氱錛屾瘮濡傦細UTF-8, UTF-7, UTF-16, UnicodeLittle, UnicodeBig 絳夈?/p>
綆鍗曚粙緇嶄竴涓嬪父鐢ㄧ殑緙栫爜瑙勫垯錛屼負鍚庤竟鐨勭珷鑺傚仛涓涓噯澶囥傚湪榪欓噷錛屾垜浠牴鎹紪鐮佽鍒欑殑鐗圭偣錛屾妸鎵鏈夌殑緙栫爜鍒嗘垚涓夌被錛?/p>
鎴戜滑瀹為檯涓婃病鏈夊繀瑕佸幓娣辯┒姣忎竴縐嶇紪鐮佸叿浣撴妸鏌愪竴涓瓧絎︾紪鐮佹垚浜嗗摢鍑犱釜瀛楄妭錛屾垜浠彧闇瑕佺煡閬?#8220;緙栫爜”鐨勬蹇靛氨鏄妸“瀛楃”杞寲鎴?#8220;瀛楄妭”灝卞彲浠ヤ簡銆傚浜?#8220;UNICODE 緙栫爜”錛岀敱浜庡畠浠槸鍙互閫氳繃璁$畻寰楀埌鐨勶紝鍥犳錛屽湪鐗規畩鐨勫満鍚堬紝鎴戜滑鍙互鍘諱簡瑙f煇涓縐?#8220;UNICODE 緙栫爜”鏄庢牱鐨勮鍒欍?/p>
鍦?C++ 鍜?Java 涓紝鐢ㄦ潵浠h〃“瀛楃”鍜?#8220;瀛楄妭”鐨勬暟鎹被鍨嬶紝浠ュ強榪涜緙栫爜鐨勬柟娉曪細 浠ヤ笂闇瑕佹敞鎰忓嚑鐐癸細 澹版槑涓孌靛瓧絎︿覆甯擱噺錛?/p>
UNICODE 瀛楃涓茬殑 I/O 鎿嶄綔錛屽瓧絎︿笌瀛楄妭鐨勮漿鎹㈡搷浣滐細 鍦?Visual C++ 涓紝UNICODE 瀛楃涓插父閲忔湁鏇寸畝鍗曠殑琛ㄧず鏂規硶銆傚鏋滄簮紼嬪簭鐨勭紪鐮佷笌褰撳墠榛樿 ANSI 緙栫爜涓嶇錛屽垯闇瑕佷嬌鐢?#pragma setlocale錛屽憡璇夌紪璇戝櫒婧愮▼搴忎嬌鐢ㄧ殑緙栫爜錛?/p>
浠ヤ笂闇瑕佹敞鎰?#pragma setlocale 涓?setlocale(LC_ALL, "") 鐨勪綔鐢ㄦ槸涓嶅悓鐨勶紝#pragma setlocale 鍦ㄧ紪璇戞椂璧蜂綔鐢紝setlocale() 鍦ㄨ繍琛屾椂璧蜂綔鐢ㄣ?/p>
瀛楃涓茬被 String 涓殑鍐呭鏄?UNICODE 瀛楃涓詫細 瀛楃涓?I/O 鎿嶄綔錛屽瓧絎︿笌瀛楄妭杞崲鎿嶄綔銆傚湪 Java 鍖?java.io.* 涓紝浠?#8220;Stream”緇撳熬鐨勭被涓鑸槸鐢ㄦ潵鎿嶄綔“瀛楄妭涓?#8221;鐨勭被錛屼互“Reader”錛?#8220;Writer”緇撳熬鐨勭被涓鑸槸鐢ㄦ潵鎿嶄綔“瀛楃涓?#8221;鐨勭被銆?/p>
濡傛灉 java 鐨勬簮紼嬪簭緙栫爜涓庡綋鍓嶉粯璁?ANSI 緙栫爜涓嶇錛屽垯鍦ㄧ紪璇戠殑鏃跺欙紝闇瑕佹寚鏄庝竴涓嬫簮紼嬪簭鐨勭紪鐮併傛瘮濡傦細 浠ヤ笂闇瑕佹敞鎰忓尯鍒嗘簮紼嬪簭鐨勭紪鐮佷笌 I/O 鎿嶄綔鐨勭紪鐮侊紝鍓嶈呮槸鍦ㄧ紪璇戞椂璧蜂綔鐢紝鍚庤呮槸鍦ㄨ繍琛屾椂璧蜂綔鐢ㄣ?/p>
絎竴縐嶈瑙o紝寰寰鏄鑷翠貢鐮佷駭鐢熺殑鍘熷洜銆傜浜岀璇В錛屽線寰瀵艱嚧鏈潵瀹規槗綰犳鐨勪貢鐮侀棶棰樺彉寰楁洿澶嶆潅銆?/p>
鍦ㄨ繖閲岋紝鎴戜滑鍙互鐪嬪埌錛屽叾涓墍璁茬殑“璇В涓”錛屽嵆閲囩敤姣?#8220;涓涓瓧鑺?#8221;灝辨槸“涓涓瓧絎?#8221;鐨勮漿鍖栨柟娉曪紝瀹為檯涓婁篃灝辯瓑鍚屼簬閲囩敤 iso-8859-1 榪涜杞寲銆傚洜姝わ紝鎴戜滑甯稿父浣跨敤 bytes = string.getBytes("iso-8859-1") 鏉ヨ繘琛岄嗗悜鎿嶄綔錛屽緱鍒板師濮嬬殑“瀛楄妭涓?#8221;銆傜劧鍚庡啀浣跨敤姝g‘鐨?ANSI 緙栫爜錛屾瘮濡?string = new String(bytes, "GB2312")錛屾潵寰楀埌姝g‘鐨?#8220;UNICODE 瀛楃涓?#8221;銆?/p>
闈?UNICODE 紼嬪簭涓殑瀛楃涓詫紝閮芥槸浠ユ煇縐?ANSI 緙栫爜褰㈠紡瀛樺湪鐨勩傚鏋滅▼搴忚繍琛屾椂鐨勮璦鐜涓庡紑鍙戞椂鐨勮璦鐜涓嶅悓錛屽皢浼氬鑷?ANSI 瀛楃涓茬殑鏄劇ず澶辮觸銆?/p>
姣斿錛屽湪鏃ユ枃鐜涓嬪紑鍙戠殑闈?UNICODE 鐨勬棩鏂囩▼搴忕晫闈紝鎷垮埌涓枃鐜涓嬭繍琛屾椂錛岀晫闈笂灝嗘樉紺轟貢鐮併傚鏋滆繖涓棩鏂囩▼搴忕晫闈㈡敼涓洪噰鐢?UNICODE 鏉ヨ褰曞瓧絎︿覆錛岄偅涔堝綋鍦ㄤ腑鏂囩幆澧冧笅榪愯鏃訛紝鐣岄潰涓婂皢鍙互鏄劇ず姝e父鐨勬棩鏂囥?/p>
鐢變簬瀹㈣鍘熷洜錛屾湁鏃跺欐垜浠繀欏誨湪涓枃鎿嶄綔緋葷粺涓嬭繍琛岄潪 UNICODE 鐨勬棩鏂囪蔣浠訛紝榪欐椂鎴戜滑鍙互閲囩敤涓浜涘伐鍏鳳紝姣斿錛屽崡鏋佹槦錛孉ppLocale 絳夛紝鏆傛椂鐨勬ā鎷熶笉鍚岀殑璇█鐜銆?/p>
褰撻〉闈腑鐨勮〃鍗曟彁浜ゅ瓧絎︿覆鏃訛紝棣栧厛鎶婂瓧絎︿覆鎸夌収褰撳墠欏甸潰鐨勭紪鐮侊紝杞寲鎴愬瓧鑺備覆銆傜劧鍚庡啀灝嗘瘡涓瓧鑺傝漿鍖栨垚 "%XX" 鐨勬牸寮忔彁浜ゅ埌 Web 鏈嶅姟鍣ㄣ傛瘮濡傦紝涓涓紪鐮佷負 GB2312 鐨勯〉闈紝鎻愪氦 "涓? 榪欎釜瀛楃涓叉椂錛屾彁浜ょ粰鏈嶅姟鍣ㄧ殑鍐呭涓?"%D6%D0"銆?/p>
鍦ㄦ湇鍔″櫒绔紝Web 鏈嶅姟鍣ㄦ妸鏀跺埌鐨?"%D6%D0" 杞寲鎴?[0xD6, 0xD0] 涓や釜瀛楄妭錛岀劧鍚庡啀鏍規嵁 GB2312 緙栫爜瑙勫垯寰楀埌 "涓? 瀛椼?/p>
鍦?Tomcat 鏈嶅姟鍣ㄤ腑錛宺equest.getParameter() 寰楀埌涔辯爜鏃訛紝甯稿父鏄洜涓哄墠闈㈡彁鍒扮殑“璇В涓”閫犳垚鐨勩傞粯璁ゆ儏鍐典笅錛屽綋鎻愪氦 "%D6%D0" 緇?Tomcat 鏈嶅姟鍣ㄦ椂錛宺equest.getParameter() 灝嗚繑鍥?[0x00D6, 0x00D0] 涓や釜 UNICODE 瀛楃錛岃屼笉鏄繑鍥炰竴涓?"涓? 瀛楃銆傚洜姝わ紝鎴戜滑闇瑕佷嬌鐢?bytes = string.getBytes("iso-8859-1") 寰楀埌鍘熷鐨勫瓧鑺備覆錛屽啀鐢?string = new String(bytes, "GB2312") 閲嶆柊寰楀埌姝g‘鐨勫瓧絎︿覆 "涓?銆?/p>
閫氳繃鏁版嵁搴撳鎴風錛堟瘮濡?ODBC 鎴?JDBC錛変粠鏁版嵁搴撴湇鍔″櫒涓鍙栧瓧絎︿覆鏃訛紝瀹㈡埛绔渶瑕佷粠鏈嶅姟鍣ㄨ幏鐭ユ墍浣跨敤鐨?ANSI 緙栫爜銆傚綋鏁版嵁搴撴湇鍔″櫒鍙戦佸瓧鑺傛祦緇欏鎴風鏃訛紝瀹㈡埛绔礋璐e皢瀛楄妭嫻佹寜鐓ф紜殑緙栫爜杞寲鎴?UNICODE 瀛楃涓層?/p>
濡傛灉浠庢暟鎹簱璇誨彇瀛楃涓叉椂寰楀埌涔辯爜錛岃屾暟鎹簱涓瓨鏀劇殑鏁版嵁鍙堟槸姝g‘鐨勶紝閭d箞寰寰榪樻槸鍥犱負鍓嶉潰鎻愬埌鐨?#8220;璇В涓”閫犳垚鐨勩傝В鍐崇殑鍔炴硶榪樻槸閫氳繃 string = new String( string.getBytes("iso-8859-1"), "GB2312") 鐨勬柟娉曪紝閲嶆柊寰楀埌鍘熷鐨勫瓧鑺備覆錛屽啀閲嶆柊浣跨敤姝g‘鐨勭紪鐮佽漿鍖栨垚瀛楃涓層?/p>
褰撲竴孌?Text 鎴栬?HTML 閫氳繃鐢靛瓙閭歡浼犻佹椂錛屽彂閫佺殑鍐呭棣栧厛閫氳繃涓縐嶆寚瀹氱殑瀛楃緙栫爜杞寲鎴?#8220;瀛楄妭涓?#8221;錛岀劧鍚庡啀鎶?#8220;瀛楄妭涓?#8221;閫氳繃涓縐嶆寚瀹氱殑浼犺緭緙栫爜錛圕ontent-Transfer-Encoding錛夎繘琛岃漿鍖栧緱鍒板彟涓涓?#8220;瀛楄妭涓?#8221;銆傛瘮濡傦紝鎵撳紑涓灝佺數瀛愰偖浠舵簮浠g爜錛屽彲浠ョ湅鍒扮被浼肩殑鍐呭錛?/p>
鏈甯哥敤鐨?Content-Transfer-Encoding 鏈?Base64 鍜?Quoted-Printable 涓ょ銆傚湪瀵逛簩榪涘埗鏂囦歡鎴栬呬腑鏂囨枃鏈繘琛岃漿鍖栨椂錛孊ase64 寰楀埌鐨?#8220;瀛楄妭涓?#8221;姣?Quoted-Printable 鏇寸煭銆傚湪瀵硅嫳鏂囨枃鏈繘琛岃漿鍖栨椂錛孮uoted-Printable 寰楀埌鐨?#8220;瀛楄妭涓?#8221;姣?Base64 鏇寸煭銆?/p>
閭歡鐨勬爣棰橈紝鐢ㄤ簡涓縐嶆洿綆鐭殑鏍煎紡鏉ユ爣娉?#8220;瀛楃緙栫爜”鍜?#8220;浼犺緭緙栫爜”銆傛瘮濡傦紝鏍囬鍐呭涓?"涓?錛屽垯鍦ㄩ偖浠舵簮浠g爜涓〃紺轟負錛?/p>
鍏朵腑錛?/p>
濡傛灉“浼犺緭緙栫爜”鏀逛負 Quoted-Printable錛屽悓鏍鳳紝濡傛灉鏍囬鍐呭涓?"涓?錛?/p>
濡傛灉闃呰閭歡鏃跺嚭鐜頒貢鐮侊紝涓鑸槸鍥犱負“瀛楃緙栫爜”鎴?#8220;浼犺緭緙栫爜”鎸囧畾鏈夎錛屾垨鑰呮槸娌℃湁鎸囧畾銆傛瘮濡傦紝鏈夌殑鍙戦偖浠剁粍浠跺湪鍙戦侀偖浠舵椂錛屾爣棰?"涓?錛?/p>
榪欐牱鐨勮〃紺猴紝瀹為檯涓婃槸鏄庣‘鎸囨槑浜嗘爣棰樹負 [0x00D6, 0x00D0]錛屽嵆 "ÖÐ"錛岃屼笉鏄?"涓?銆?/p>
闈炰篃銆俰so-8859-1 鍙槸鍗曞瓧鑺傚瓧絎﹂泦涓渶綆鍗曠殑涓縐嶏紝涔熷氨鏄?#8220;瀛楄妭緙栧彿”涓?#8220;UNICODE 瀛楃緙栧彿”涓鑷寸殑閭g緙栫爜瑙勫垯銆傚綋鎴戜滑瑕佹妸涓涓?#8220;瀛楄妭涓?#8221;杞寲鎴?#8220;瀛楃涓?#8221;錛岃屽張涓嶇煡閬撳畠鏄摢涓縐?ANSI 緙栫爜鏃訛紝鍏堟殏鏃跺湴鎶?#8220;姣忎竴涓瓧鑺?#8221;浣滀負“涓涓瓧絎?#8221;榪涜杞寲錛屼笉浼氶犳垚淇℃伅涓㈠け銆傜劧鍚庡啀浣跨敤 bytes = string.getBytes("iso-8859-1") 鐨勬柟娉曞彲鎭㈠鍒板師濮嬬殑瀛楄妭涓層?/p>
Java 涓紝瀛楃涓茬被 java.lang.String 澶勭悊鐨勬槸 UNICODE 瀛楃涓詫紝涓嶆槸 ANSI 瀛楃涓層傛垜浠彧闇瑕佹妸瀛楃涓蹭綔涓?#8220;鎶借薄鐨勭鍙風殑涓?#8221;鏉ョ湅寰呫傚洜姝や笉瀛樺湪瀛楃涓茬殑鍐呯爜鐨勯棶棰樸?/p>
![]()
![]()
1.3 瀛楃闆嗕笌緙栫爜
![]()
![]()
1.4 甯哥敤鐨勭紪鐮佺畝浠?/h5>
鍒嗙被
緙栫爜鏍囧噯
璇存槑
鍗曞瓧鑺傚瓧絎︾紪鐮?/td>
ISO-8859-1
鏈綆鍗曠殑緙栫爜瑙勫垯錛屾瘡涓涓瓧鑺傜洿鎺ヤ綔涓轟竴涓?UNICODE 瀛楃銆傛瘮濡傦紝[0xD6, 0xD0] 榪欎袱涓瓧鑺傦紝閫氳繃 iso-8859-1 杞寲涓哄瓧絎︿覆鏃訛紝灝嗙洿鎺ュ緱鍒?[0x00D6, 0x00D0] 涓や釜 UNICODE 瀛楃錛屽嵆 "ÖÐ"銆?br>
鍙嶄箣錛屽皢 UNICODE 瀛楃涓查氳繃 iso-8859-1 杞寲涓哄瓧鑺備覆鏃訛紝鍙兘姝e父杞寲 0~255 鑼冨洿鐨勫瓧絎︺?/td>
ANSI 緙栫爜
GB2312,
BIG5,
Shift_JIS,
ISO-8859-2 ……鎶?UNICODE 瀛楃涓查氳繃 ANSI 緙栫爜杞寲涓?#8220;瀛楄妭涓?#8221;鏃訛紝鏍規嵁鍚勮嚜緙栫爜鐨勮瀹氾紝涓涓?UNICODE 瀛楃鍙兘杞寲鎴愪竴涓瓧鑺傛垨澶氫釜瀛楄妭銆?br>
鍙嶄箣錛屽皢瀛楄妭涓茶漿鍖栨垚瀛楃涓叉椂錛屼篃鍙兘澶氫釜瀛楄妭杞寲鎴愪竴涓瓧絎︺傛瘮濡傦紝[0xD6, 0xD0] 榪欎袱涓瓧鑺傦紝閫氳繃 GB2312 杞寲涓哄瓧絎︿覆鏃訛紝灝嗗緱鍒?[0x4E2D] 涓涓瓧絎︼紝鍗?'涓? 瀛椼?br>
“ANSI 緙栫爜”鐨勭壒鐐癸細
1. 榪欎簺“ANSI 緙栫爜鏍囧噯”閮藉彧鑳藉鐞嗗悇鑷璦鑼冨洿涔嬪唴鐨?UNICODE 瀛楃銆?br>2. “UNICODE 瀛楃”涓?#8220;杞崲鍑烘潵鐨勫瓧鑺?#8221;涔嬮棿鐨勫叧緋繪槸浜轟負瑙勫畾鐨勩?/td>
UNICODE 緙栫爜
UTF-8,
UTF-16, UnicodeBig ……涓?#8220;ANSI 緙栫爜”綾諱技鐨勶紝鎶婂瓧絎︿覆閫氳繃 UNICODE 緙栫爜杞寲鎴?#8220;瀛楄妭涓?#8221;鏃訛紝涓涓?UNICODE 瀛楃鍙兘杞寲鎴愪竴涓瓧鑺傛垨澶氫釜瀛楄妭銆?br>
涓?#8220;ANSI 緙栫爜”涓嶅悓鐨勬槸錛?br>1. 榪欎簺“UNICODE 緙栫爜”鑳藉澶勭悊鎵鏈夌殑 UNICODE 瀛楃銆?br>2. “UNICODE 瀛楃”涓?#8220;杞崲鍑烘潵鐨勫瓧鑺?#8221;涔嬮棿鏄彲浠ラ氳繃璁$畻寰楀埌鐨勩?/td>
![]()
![]()
2. 瀛楃涓庣紪鐮佸湪紼嬪簭涓殑瀹炵幇
2.1 紼嬪簭涓殑瀛楃涓庡瓧鑺?/h5>
綾誨瀷鎴栨搷浣?/strong>
C++
Java
瀛楃
wchar_t
char
瀛楄妭
char
byte
ANSI 瀛楃涓?/td>
char[]
byte[]
UNICODE 瀛楃涓?/td>
wchar_t[]
String
瀛楄妭涓?#8594;瀛楃涓?/td>
mbstowcs(), MultiByteToWideChar()
string = new String(bytes, "encoding")
瀛楃涓?#8594;瀛楄妭涓?/td>
wcstombs(), WideCharToMultiByte()
bytes = string.getBytes("encoding")
![]()
![]()
2.2 C++ 涓浉鍏沖疄鐜版柟娉?/h5>
// ANSI 瀛楃涓詫紝鍐呭闀垮害 7 瀛楄妭
char sz[20] = "涓枃123";
// UNICODE 瀛楃涓詫紝鍐呭闀垮害 5 涓?wchar_t錛?0 瀛楄妭錛?/span>
wchar_t wsz[20] = L"\x4E2D\x6587\x0031\x0032\x0033";
// 榪愯鏃惰瀹氬綋鍓?ANSI 緙栫爜錛孷C 鏍煎紡
setlocale(LC_ALL, ".936");
// GCC 涓牸寮?/span>
setlocale(LC_ALL, "zh_CN.GBK");
// Visual C++ 涓嬌鐢ㄥ皬鍐?%s錛屾寜鐓?setlocale 鎸囧畾緙栫爜杈撳嚭鍒版枃浠?br>// GCC 涓嬌鐢ㄥぇ鍐?%S
fwprintf(fp, L"%s\n", wsz);
// 鎶?UNICODE 瀛楃涓叉寜鐓?setlocale 鎸囧畾鐨勭紪鐮佽漿鎹㈡垚瀛楄妭
wcstombs(sz, wsz, 20);
// 鎶婂瓧鑺備覆鎸夌収 setlocale 鎸囧畾鐨勭紪鐮佽漿鎹㈡垚 UNICODE 瀛楃涓?br>mbstowcs(wsz, sz, 20);
// 濡傛灉婧愮▼搴忕殑緙栫爜涓庡綋鍓嶉粯璁?ANSI 緙栫爜涓嶄竴鑷達紝
// 鍒欓渶瑕佹琛岋紝緙栬瘧鏃剁敤鏉ユ寚鏄庡綋鍓嶆簮紼嬪簭浣跨敤鐨勭紪鐮?/font>
#pragma setlocale(".936")
// UNICODE 瀛楃涓插父閲忥紝鍐呭闀垮害 10 瀛楄妭
wchar_t wsz[20] = L"涓枃123";
![]()
![]()
2.3 Java 涓浉鍏沖疄鐜版柟娉?/h5>
// Java 浠g爜錛岀洿鎺ュ啓涓枃
String string = "涓枃123";
// 寰楀埌闀垮害涓?5錛屽洜涓烘槸 5 涓瓧絎?/span>
System.out.println(string.length());
// 瀛楃涓蹭笌瀛楄妭涓查棿鐩鎬簰杞寲
// 鎸夌収 GB2312 寰楀埌瀛楄妭錛堝緱鍒板瀛楄妭瀛楃涓詫級
byte [] bytes = string.getBytes("GB2312");
// 浠庡瓧鑺傛寜鐓?GB2312 寰楀埌 UNICODE 瀛楃涓?/span>
string = new String(bytes, "GB2312");
// 瑕佸皢 String 鎸夌収鏌愮緙栫爜鍐欏叆鏂囨湰鏂囦歡錛屾湁涓ょ鏂規硶錛?br>
// 絎竴縐嶅姙娉曪細鐢?Stream 綾誨啓鍏ュ凡緇忔寜鐓ф寚瀹氱紪鐮佽漿鍖栧ソ鐨勫瓧鑺備覆
OutputStream os = new FileOutputStream("1.txt");
os.write(bytes);
os.close();
// 絎簩縐嶅姙娉曪細鏋勯犳寚瀹氱紪鐮佺殑 Writer 鏉ュ啓鍏ュ瓧絎︿覆
Writer ow = new OutputStreamWriter(new FileOutputStream("2.txt"), "GB2312");
ow.write(string);
ow.close();
/* 鏈鍚庡緱鍒扮殑 1.txt 鍜?2.txt 閮芥槸 7 涓瓧鑺?*/
E:\>javac -encoding BIG5 Hello.java
![]()
![]()
3. 鍑犵璇В錛屼互鍙婁貢鐮佷駭鐢熺殑鍘熷洜鍜岃В鍐沖姙娉?/h4>
3.1 瀹規槗浜х敓鐨勮瑙?/h5>
銆
瀵圭紪鐮佺殑璇В
璇В涓
鍦ㄥ皢“瀛楄妭涓?#8221;杞寲鎴?#8220;UNICODE 瀛楃涓?#8221;鏃訛紝姣斿鍦ㄨ鍙栨枃鏈枃浠舵椂錛屾垨鑰呴氳繃緗戠粶浼犺緭鏂囨湰鏃訛紝瀹規槗灝?#8220;瀛楄妭涓?#8221;綆鍗曞湴浣滀負鍗曞瓧鑺傚瓧絎︿覆錛岄噰鐢ㄦ瘡“涓涓瓧鑺?#8221;灝辨槸“涓涓瓧絎?#8221;鐨勬柟娉曡繘琛岃漿鍖栥?br>
鑰屽疄闄呬笂錛屽湪闈炶嫳鏂囩殑鐜涓紝搴旇灝?#8220;瀛楄妭涓?#8221;浣滀負 ANSI 瀛楃涓詫紝閲囩敤閫傚綋鐨勭紪鐮佹潵寰楀埌 UNICODE 瀛楃涓詫紝鏈夊彲鑳?#8220;澶氫釜瀛楄妭”鎵嶈兘寰楀埌“涓涓瓧絎?#8221;銆?br>
閫氬父錛屼竴鐩村湪鑻辨枃鐜涓嬪仛寮鍙戠殑紼嬪簭鍛樹滑錛屽鏄撴湁榪欑璇В銆?/td>
璇В浜?/td>
鍦?DOS錛學indows 98 絳夐潪 UNICODE 鐜涓嬶紝瀛楃涓查兘鏄互 ANSI 緙栫爜鐨勫瓧鑺傚艦寮忓瓨鍦ㄧ殑銆傝繖縐嶄互瀛楄妭褰㈠紡瀛樺湪鐨勫瓧絎︿覆錛屽繀欏葷煡閬撴槸鍝緙栫爜鎵嶈兘琚紜湴浣跨敤銆傝繖浣挎垜浠艦鎴愪簡涓涓儻鎬ф濈淮錛?#8220;瀛楃涓茬殑緙栫爜”銆?br>
褰?UNICODE 琚敮鎸佸悗錛孞ava 涓殑 String 鏄互瀛楃鐨?#8220;搴忓彿”鏉ュ瓨鍌ㄧ殑錛屼笉鏄互“鏌愮緙栫爜鐨勫瓧鑺?#8221;鏉ュ瓨鍌ㄧ殑錛屽洜姝ゅ凡緇忎笉瀛樺湪“瀛楃涓茬殑緙栫爜”榪欎釜姒傚康浜嗐傚彧鏈夊湪“瀛楃涓?#8221;涓?#8220;瀛楄妭涓?#8221;杞寲鏃訛紝鎴栬咃紝灝嗕竴涓?#8220;瀛楄妭涓?#8221;褰撴垚涓涓?ANSI 瀛楃涓叉椂錛屾墠鏈夌紪鐮佺殑姒傚康銆?br>
涓嶅皯鐨勪漢閮芥湁榪欎釜璇В銆?/td>
![]()
![]()
3.2 闈?UNICODE 紼嬪簭鍦ㄤ笉鍚岃璦鐜闂寸Щ妞嶆椂鐨勪貢鐮?/h5>
![]()
![]()
3.3 緗戦〉鎻愪氦瀛楃涓?/h5>
![]()
![]()
3.4 浠庢暟鎹簱璇誨彇瀛楃涓?/h5>
![]()
![]()
3.5 鐢靛瓙閭歡涓殑瀛楃涓?/h5>
Content-Type: text/plain;
charset="gb2312"
Content-Transfer-Encoding: base64
sbG+qcrQuqO17cf4yee74bGjz9W7+b3wudzA7dbQ0MQNCg0KvPKzxqO6uqO17cnnsaPW0NDEDQoNCg==
// 姝g‘鐨勬爣棰樻牸寮?/span>
Subject: =?GB2312?B?1tA=?=
// 姝g‘鐨勬爣棰樻牸寮?/span>
Subject: =?GB2312?Q?=D6=D0?=
// 閿欒鐨勬爣棰樻牸寮?/span>
Subject: =?ISO-8859-1?Q?=D6=D0?=
![]()
![]()
4. 鍑犵閿欒鐞嗚В鐨勭籂姝?/h4>
璇В錛?#8220;ISO-8859-1 鏄浗闄呯紪鐮侊紵”
璇В錛?#8220;Java 涓紝鎬庢牱鐭ラ亾鏌愪釜瀛楃涓茬殑鍐呯爜錛?#8221;
娉細涓婇潰鐨勪笁涓瓧鑺備綅鍘熸枃錛屼笅闈㈠洓涓瓧鑺備負Base64緙栫爜錛屽叾鍓嶄袱浣嶅潎涓?銆?
榪欐牱鎷嗗垎鐨勬椂鍊欙紝鍘熸枃鐨勫瓧鑺傛暟閲忓簲璇ユ槸3鐨勫嶆暟錛屽綋榪欎釜鏉′歡涓嶈兘婊¤凍鏃訛紝鐢ㄥ叏闆跺瓧鑺?
琛ヨ凍錛岃漿鍖栨椂Base64緙栫爜鐢?鍙蜂唬鏇匡紝榪欏氨鏄負浠涔堟湁浜汢ase64緙栫爜浠ヤ竴涓垨涓や釜絳夊彿緇?
鏉熺殑鍘熷洜錛屼絾絳夊彿鏈澶氭湁涓や釜錛屽洜涓猴細濡傛灉F(origin)浠h〃鍘熸枃鐨勫瓧鑺傛暟錛孎(remain)浠?
琛ㄤ綑鏁幫紝鍒?
F(remain) = F(origin) MOD 3 鎴愮珛銆?
鎵浠(remain)鐨勫彲鑳藉彇鍊間負0,1,2.
濡傛灉璁?n = [F(origin) – F(remain)] / 3
褰揊(remain) = 0 鏃訛紝鎭板ソ杞崲涓?*n涓瓧鑺傜殑Base64緙栫爜銆?
褰揊(remain) = 1 鏃訛紝鐢變簬涓涓師鏂囧瓧鑺傚彲浠ユ媶鍒嗕負灞炰簬涓や釜Base64緙栫爜鐨勫瓧鑺傦紝涓轟簡
璁〣ase64緙栫爜鏄?鐨勫嶆暟錛屾墍浠ュ簲璇ヤ負琛?涓瓑鍙楓?
褰揊(remain) = 2 鏃訛紝鐢變簬涓や釜鍘熸枃瀛楄妭鍙互鎷嗗垎涓哄睘浜?涓狟ase64緙栫爜鐨勫瓧鑺傦紝鍚岀悊錛?
搴旇琛ヤ笂涓涓瓑鍙?nbsp;