문자열 인코딩 체크함수

2009. 5. 21. 11:30

int CheckEncodingType(CString strPage)
{
//////////////////////////////////////////////////////////////////////////
// 
// Function : CheckEncodingType
// Param : LPCTSTR lpFilelocation 파일의 저장위치
// Return : integer;    0  = ANSI
//                      1  = UTF-8
//                      2  = Unicode LE
//                      3  = Unicdoe BE
//                      -1 = File Error
//
// description : 텍스트의 인코딩 형식을 찾아 리턴한다
    int nRet = 0;
    BYTE btBuf[8];
    memset(btBuf, 0, 8);
    //BOM 정의
    BYTE btBOM_UnicodeBE[] = {0xFE, 0xFF};
    BYTE btBOM_UnicodeLE[] = {0xFF, 0xFE};
    BYTE btBOM_UTF8[] = {0xEF, 0xBB, 0xBF};
    //3바이트이상
    if(strPage.GetLength() < 3)
        return -1;
    if(memcmp(btBuf, btBOM_UnicodeLE, 2) == 0) //Unicode Little Endian
    {
        nRet = 2;
        return nRet;
    }
    else if(memcmp(btBuf, btBOM_UnicodeBE, 2) == 0) //Unicode Big Endian
    {
        nRet = 3;
        return nRet;
    }
    else if(memcmp(btBuf, btBOM_UTF8, 3) == 0) //UTF-8
    {
        nRet = 1;
        return nRet;
    }
    // ANSI와 UTF-8을 노가다로 판단하는 루틴
    memset(btBuf, 0, 8);
    int nLookNum = 0; // 이후 출현할 바이트 개수값
    int nRead = 0;
    int nANSINum = 0;
    for (int i=0; i 0) //내부 검사
            {
                nLookNum--;             
                if(btBuf[0] >= 0x80 && btBuf[0] <= 0xBF)
                    continue;
                else
                    return 0; //ANSI 리턴
            }
            else  //최외각 검사
            {
                if(btBuf[0] >= 0xC0 && btBuf[0] <= 0xDF)  nLookNum = 1;
                else if(btBuf[0] >= 0xE0 && btBuf[0] <= 0xEF) nLookNum = 2;
                else if(btBuf[0] >= 0xF0 && btBuf[0] <= 0xF7) nLookNum = 3;
                else if(btBuf[0] >= 0xF8 && btBuf[0] <= 0xFB) nLookNum = 4;
                else if(btBuf[0] == 0xFC || btBuf[0] == 0xFD) nLookNum = 5;
                else           nANSINum++;
                continue;
            }
        }
    }   
    if(nRead != nANSINum) nRet = 1;
    else     nRet = 0;
    return nRet;
}

출처 : 데브피아
http://www.devpia.com/MAEUL/Contents/Detail.aspx?BoardID=50&MAEULNo=20&no=568605&ref=561246

저작자표시 (새창열림)

public void

문자열 인코딩 체크함수

티스토리툴바