支持UNICODE读写的MFC扩展类CStdioFileEx_StdioFileEx.cpp资源-CSDN文库

共5个文件

h：3个

cpp：2个

UNICODE

CStdioFile

5星 · 超过95%的资源需积分: 49 148 浏览量 2009-04-02 15:29:45 上传评论 1 收藏 18KB ZIP 举报

资源推荐

资源详情

资源评论

收起资源包目录

StdioFileEx_Source_Code.zip （5个子文件）

StdioFileEx.h 17KB

ggets.cpp 6KB

TemplateSmartPtr.h 3KB

ggets.h 2KB

StdioFileEx.cpp 41KB

// StdioFileEx.cpp: implementation of the CStdioFileEx class. // // Version 1.1 23 August 2003. Incorporated fixes from Dennis Jeryd. // Version 1.3 19 February 2005. Incorporated fixes from Howard J Oh and some of my own. // Version 1.4 26 February 2005. Fixed stupid screw-up in code from 1.3. // Version 1.5 18 November 2005. - Incorporated fixes from Andy Goodwin. // - Allows code page to be specified for reading/writing // - Properly calculates multibyte buffer size instead of // assuming lstrlen(s). // - Should handle UTF8 properly. // Version 1.6 19 July 2007. - ReadString incorrectly removed \r or \n characters // immediately preceding line breaks. // Fixed tab problem in these comments! (Perry). // Made GetMultiByteStringFromUnicodeString input string const // (Perry). // Avoided double conversion if code page not set. // (Konrad Windszus). // Fixed ASSERT in GetUnicodeStringFromMultiByteString // (Konrad Windszus). // Maximum line length restriction removed. Lines of any length // can now be read thanks to C.B. Falconer's fggets (fgoodgets), // ably assisted by Ana Sayfa and Dave Kondrad. // Substantial code reorganisation and tidying. // Use of strlen/lstrlen eliminated. Conversion functions always used // to calculate required buffers. // Serious, systematic tests are now included with the code. // Options included to switch off BOM writing and alter the Unicode // filler char. // BOM is only stripped off if actually there. // UTF-8 BOM is now read and written. UTF-8 conversion works. // // Copyright David Pritchard 2003-2007. davidpritchard@ctv.es // // You can use this class freely, but please keep my ego happy // by leaving this comment in place. // ////////////////////////////////////////////////////////////////////// #include "stdafx.h" #include "StdioFileEx.h" #include "ggets.h" #ifdef _DEBUG #undef THIS_FILE static char THIS_FILE[]=__FILE__; #define new DEBUG_NEW #endif const unsigned char UTF8_BOM[] = { unsigned char(0xEF), unsigned char(0xBB), unsigned char(0xBF) }; ////////////////////////////////////////////////////////////////////// // Construction/Destruction ////////////////////////////////////////////////////////////////////// // Add this flag to write in Unicode. For the moment, out of range of all the Visual Studio 2005 flags /*static*/ const UINT CStdioFileEx::modeWriteUnicode = 0x200000; CStdioFileEx::CStdioFileEx(): m_bCheckFilePos(true), m_bIsUnicodeText(false), m_nFileCodePage(-1), m_cUnicodeFillerChar(sDEFAULT_UNICODE_FILLER_CHAR), m_bWriteBOM(true), // By default, write the BOM CStdioFile() { } CStdioFileEx::CStdioFileEx(LPCTSTR lpszFileName,UINT nOpenFlags): m_bCheckFilePos(true), m_bIsUnicodeText(false), m_nFileCodePage(-1), m_cUnicodeFillerChar(sDEFAULT_UNICODE_FILLER_CHAR), m_bWriteBOM(true), // By default, write the BOM CStdioFile(lpszFileName, nOpenFlags) { } // Set the code page for reading/writing void CStdioFileEx::SetCodePage(IN const UINT nCodePage) { m_nFileCodePage = (int)nCodePage; } // Set the Unicode filler char - the char written when no conversion is possible for the target multibyte char set void CStdioFileEx::SetFillerChar(IN const char cFiller) { m_cUnicodeFillerChar = cFiller; } // Determines whether the byte-order-mark is written at the start of a Unicode file void CStdioFileEx::SetWriteBOM(IN const bool bWrite) { m_bWriteBOM = bWrite; } // Determines whether we try to interpret this file as Unicode //void CStdioFileEx::SetUnicode(IN const bool bIsUnicode) //{ // m_bIsUnicodeText = bIsUnicode; //} BOOL CStdioFileEx::Open(LPCTSTR lpszFileName,UINT nOpenFlags,CFileException* pError /*=NULL*/) { // Process any Unicode stuff. This no longer checks for the Unicode BOM. We do this on // opening for efficiency. ProcessFlags(nOpenFlags); BOOL bOK = CStdioFile::Open(lpszFileName, nOpenFlags, pError); if (bOK) { // If we are reading, see if it has a BOM. I tried making the Unicode-ness independent of the BOM (i.e. allowed the file to // be identified as Unicode by the caller, with the BOM just being used as a check, or thrown away). // But for some reason it wouldn't work. I'll no doubt try again at some point. // if (!(nOpenFlags & CFile::modeCreate) && (nOpenFlags & CFile::modeRead || nOpenFlags & CFile::modeReadWrite)) if (!(nOpenFlags & CFile::modeCreate) && !(nOpenFlags & CFile::modeWrite )) { wchar_t cBOMTest; wchar_t cBOM = nUNICODE_BOM; Read(&cBOMTest, sizeof(wchar_t)); // If the first characters are NOT a BOM, reset to start of file m_bIsUnicodeText = (wmemcmp(&cBOMTest, &cBOM, 1) == 0); // Reset to start of file SeekToBegin(); m_bCheckFilePos = true; } } return bOK; } BOOL CStdioFileEx::ReadString(CString& rString) { ASSERT(m_pStream != NULL); BOOL bReadData = FALSE; LPTSTR lpsz; int nLen = 0; // If at position 0, discard byte-order mark before reading. To optimise reading, we only // check this when the m_bCheckFilePos is set (this avoids a call to ftell every time we // read a line) if (m_bCheckFilePos && GetPosition() == 0) { // m_bReadBOM = false; // Look for Unicode BOM if (m_bIsUnicodeText) { wchar_t cBOMDummy; // wchar_t cBOM = nUNICODE_BOM; Read(&cBOMDummy, sizeof(wchar_t)); // // If the first characters are NOT a BOM, reset to start of file // if (wmemcmp(&cBOMTest, &cBOM, 1) != 0) // { // SeekToBegin(); // ASSERT(GetPosition() == 0); // } // else // { // // Set read BOM flag // m_bReadBOM = true; // } } // Look for UTF8 BOM else if (CP_UTF8 == m_nFileCodePage) { BYTE arrUTF8BOMTest[sizeof(UTF8_BOM)]; Read(arrUTF8BOMTest, sizeof(arrUTF8BOMTest)); // // If the first characters are NOT a BOM, reset to start of file // if (memcmp(&arrUTF8BOMTest, UTF8_BOM, sizeof(arrUTF8BOMTest)) != 0) // { // SeekToBegin(); // ASSERT(GetPosition() == 0); // } // else // { // // Set read BOM flag // m_bReadBOM = true; // } } } // Read Unicode line or multibyte line (implementations // differ depending on the compilation) if (m_bIsUnicodeText) { bReadData = ReadUnicodeLine(rString); } else { bReadData = ReadMultiByteLine(rString); } // Then remove end-of-line character as necessary. // fggets keeps the end-of-line confusion level at maximum by stripping the \n // from the end of lines, but leaving the \r. Grrrr. // Remember that you could quite legitimately have a \r or \n at the end of // your line before the actual \r\n line break. if (bReadData) { // Copied from FileTxt.cpp but adapted to use of fgets nLen = rString.GetLength(); lpsz = rString.GetBuffer(0); // Strip \r from the end if (nLen != 0 && (lpsz[nLen-1] == _T('\r') )) { rString.GetBufferSetLength(nLen-1); } rString.ReleaseBuffer(); // Now we've moved on in the file, don't bother to check any more unless the // file pointer is moved m_bCheckFilePos = false; } return bReadData; } /*virtual*/ LPTSTR CStdioFileEx::ReadString(LPTSTR lpsz,UINT nMax) { // Ca

评论收藏

内容反馈