/*
www.sourceforge.net/projects/tinyxml
Original code (2.0 and earlier )copyright (c) 2000-2002 Lee Thomason (www.grinninglizard.com)
This software is provided 'as-is', without any express or implied
warranty. In no event will the authors be held liable for any
damages arising from the use of this software.
Permission is granted to anyone to use this software for any
purpose, including commercial applications, and to alter it and
redistribute it freely, subject to the following restrictions:
1. The origin of this software must not be misrepresented; you must
not claim that you wrote the original software. If you use this
software in a product, an acknowledgment in the product documentation
would be appreciated but is not required.
2. Altered source versions must be plainly marked as such, and
must not be misrepresented as being the original software.
3. This notice may not be removed or altered from any source
distribution.
*/
//#include "stdafx.h"
#include "tinyxml.h"
#include <ctype.h>
#include <stddef.h>
//#define DEBUG_PARSER
// Note tha "PutString" hardcodes the same list. This
// is less flexible than it appears. Changing the entries
// or order will break putstring.
TiXmlBase::Entity TiXmlBase::entity[ NUM_ENTITY ] =
{
{ "&", 5, '&' },
{ "<", 4, '<' },
{ ">", 4, '>' },
{ """, 6, '\"' },
{ "'", 6, '\'' }
};
// Bunch of unicode info at:
// http://www.unicode.org/faq/utf_bom.html
// Including the basic of this table, which determines the #bytes in the
// sequence from the lead byte. 1 placed for invalid sequences --
// although the result will be junk, pass it through as much as possible.
// Beware of the non-characters in UTF-8:
// ef bb bf (Microsoft "lead bytes")
// ef bf be
// ef bf bf
const unsigned char TIXML_UTF_LEAD_0 = 0xefU;
const unsigned char TIXML_UTF_LEAD_1 = 0xbbU;
const unsigned char TIXML_UTF_LEAD_2 = 0xbfU;
const int TiXmlBase::utf8ByteTable[256] =
{
// 0 1 2 3 4 5 6 7 8 9 a b c d e f
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0x00
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0x10
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0x20
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0x30
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0x40
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0x50
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0x60
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0x70 End of ASCII range
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0x80 0x80 to 0xc1 invalid
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0x90
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0xa0
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0xb0
1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, // 0xc0 0xc2 to 0xdf 2 byte
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, // 0xd0
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, // 0xe0 0xe0 to 0xef 3 byte
4, 4, 4, 4, 4, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 // 0xf0 0xf0 to 0xf4 4 byte, 0xf5 and higher invalid
};
void TiXmlBase::ConvertUTF32ToUTF8( unsigned long input, char* output, int* length )
{
const unsigned long BYTE_MASK = 0xBF;
const unsigned long BYTE_MARK = 0x80;
const unsigned long FIRST_BYTE_MARK[7] = { 0x00, 0x00, 0xC0, 0xE0, 0xF0, 0xF8, 0xFC };
if (input < 0x80)
*length = 1;
else if ( input < 0x800 )
*length = 2;
else if ( input < 0x10000 )
*length = 3;
else if ( input < 0x200000 )
*length = 4;
else
{ *length = 0; return; } // This code won't covert this correctly anyway.
output += *length;
// Scary scary fall throughs.
switch (*length)
{
case 4:
--output;
*output = (char)((input | BYTE_MARK) & BYTE_MASK);
input >>= 6;
case 3:
--output;
*output = (char)((input | BYTE_MARK) & BYTE_MASK);
input >>= 6;
case 2:
--output;
*output = (char)((input | BYTE_MARK) & BYTE_MASK);
input >>= 6;
case 1:
--output;
*output = (char)(input | FIRST_BYTE_MARK[*length]);
}
}
/*static*/ int TiXmlBase::IsAlpha( unsigned char anyByte, TiXmlEncoding /*encoding*/ )
{
// This will only work for low-ascii, everything else is assumed to be a valid
// letter. I'm not sure this is the best approach, but it is quite tricky trying
// to figure out alhabetical vs. not across encoding. So take a very
// conservative approach.
// if ( encoding == TIXML_ENCODING_UTF8 )
// {
if ( anyByte < 127 )
return isalpha( anyByte );
else
return 1; // What else to do? The unicode set is huge...get the english ones right.
// }
// else
// {
// return isalpha( anyByte );
// }
}
/*static*/ int TiXmlBase::IsAlphaNum( unsigned char anyByte, TiXmlEncoding /*encoding*/ )
{
// This will only work for low-ascii, everything else is assumed to be a valid
// letter. I'm not sure this is the best approach, but it is quite tricky trying
// to figure out alhabetical vs. not across encoding. So take a very
// conservative approach.
// if ( encoding == TIXML_ENCODING_UTF8 )
// {
if ( anyByte < 127 )
return isalnum( anyByte );
else
return 1; // What else to do? The unicode set is huge...get the english ones right.
// }
// else
// {
// return isalnum( anyByte );
// }
}
class TiXmlParsingData
{
friend class TiXmlDocument;
public:
void Stamp( const char* now, TiXmlEncoding encoding );
const TiXmlCursor& Cursor() { return cursor; }
private:
// Only used by the document!
TiXmlParsingData( const char* start, int _tabsize, int row, int col )
{
assert( start );
stamp = start;
tabsize = _tabsize;
cursor.row = row;
cursor.col = col;
}
TiXmlCursor cursor;
const char* stamp;
int tabsize;
};
void TiXmlParsingData::Stamp( const char* now, TiXmlEncoding encoding )
{
assert( now );
// Do nothing if the tabsize is 0.
if ( tabsize < 1 )
{
return;
}
// Get the current row, column.
int row = cursor.row;
int col = cursor.col;
const char* p = stamp;
assert( p );
while ( p < now )
{
// Treat p as unsigned, so we have a happy compiler.
const unsigned char* pU = (const unsigned char*)p;
// Code contributed by Fletcher Dunn: (modified by lee)
switch (*pU) {
case 0:
// We *should* never get here, but in case we do, don't
// advance past the terminating null character, ever
return;
case '\r':
// bump down to the next line
++row;
col = 0;
// Eat the character
++p;
// Check for \r\n sequence, and treat this as a single character
if (*p == '\n') {
++p;
}
break;
case '\n':
// bump down to the next line
++row;
col = 0;
// Eat the character
++p;
// Check for \n\r sequence, and treat this as a single
// character. (Yes, this bizarre thing does occur still
// on some arcane platforms...)
if (*p == '\r') {
++p;
}
break;
case '\t':
// Eat the character
++p;
// Skip to next tab stop
col = (col / tabsize + 1) * tabsize;
break;
case TIXML_UTF_LEAD_0:
if ( encoding == TIXML_ENCODING_UTF8 )
{
if ( *(p+1) && *(p+2) )
{
// In these cases, don't advance the column. These are
// 0-width spaces.
if ( *(pU+1)==TIXML_UTF_LEAD_1 && *(pU+2)==TIXML_UTF_LEAD_2 )
p += 3;
else if ( *(pU+1)==0xbfU && *(pU+2)==0xbeU )
p += 3;
else if ( *(pU+1)==0xbfU && *(pU+2)==0xbfU )
p += 3;
else
{ p +=3; ++col; } // A normal character.
}
}
else
{
++p;
++col;
}
break;
default:
if ( encoding == TIXML_ENCODING_UTF8 )
{
// Eat the 1 to 4 byte utf8 character.
int step = TiXmlBase::utf8ByteTable[*((unsigned char*)p)];
if ( step == 0 )
step = 1; // Error case from bad encoding, but handle gracefully.
p += step;
// Just advance one column, of course.
++col;
}
else
{
++p;
++col;
}
break;
}
}
cursor.row = row;
cursor.col = col;
assert( cursor.row >= -1 );
assert( curs
tinyXML c++实现的xml解析器



TinyXML是一款轻量级的C++库,专为解析和操作XML文档而设计。XML(eXtensible Markup Language)是一种广泛应用于数据交换、配置存储和文档结构化的标记语言。TinyXML库以其小巧、易用的特点,使得在C++项目中处理XML文件变得简单。 TinyXML库的核心功能包括: 1. **解析XML文件**:TinyXML能够读取XML文件并将其内容转换为内存中的数据结构。它首先将XML文档解析成一系列的元素(Element)、属性(Attribute)和文本节点(Text Node),这些对象可以通过C++的对象接口进行访问。 2. **DOM模型**:TinyXML采用的是Document Object Model (DOM) 模型,这意味着整个XML文档被加载到内存中,允许开发者以任意顺序遍历和修改XML树。DOM模型提供了对XML文档的完整视图,但可能会消耗较多内存,尤其对于大型XML文件。 3. **元素(Element)**:在TinyXML中,每个XML元素对应一个`TiXmlElement`对象。元素有名称、属性和子元素。通过`FirstChildElement()`和`NextSiblingElement()`等方法,可以遍历元素的子元素。 4. **属性(Attribute)**:XML元素可以包含属性,这些属性在TinyXML中表示为`TiXmlAttribute`对象。`TiXmlElement`对象提供方法来获取或设置属性,如`Attribute()`用于获取属性值,`SetAttribute()`用于设置属性值。 5. **文本节点(Text Node)**:XML元素可以包含文本内容,这些内容由`TiXmlText`对象表示。`TiXmlElement`的`FirstChild()`和`LastChild()`方法可以访问元素内的文本。 6. **错误处理**:TinyXML提供了基本的错误处理机制,当解析过程中遇到无效的XML时,会抛出异常。开发者可以通过捕获`TiXmlException`来处理这些错误。 7. **内存管理**:TinyXML库使用C++的智能指针`TiXmlHandle`来简化内存管理。智能指针在不再需要时自动释放关联的对象,降低了内存泄漏的风险。 8. **源码结构**:TinyXML库由六个主要的源代码文件组成,包括`tinyxml.h`(主头文件)、`tinyxml.cpp`(实现文件)、`tinystr.h`(字符串类)、`tinystr.cpp`、`tinyxmlerror.cpp`(错误处理)和`tinyxmlparser.cpp`(解析器)。这些文件都是库的组成部分,可以根据需要进行编译和链接。 9. **使用示例**:在C++项目中,首先需要包含`tinyxml.h`,然后创建`TiXmlDocument`对象加载XML文件,通过该对象可以访问XML树的根元素,进而遍历整个文档结构。例如: ```cpp TiXmlDocument doc("test.xml"); if (!doc.LoadFile()) { // 处理加载失败 } TiXmlElement* root = doc.RootElement(); // 遍历和操作XML树 ``` TinyXML是C++开发中处理XML文档的理想选择,特别是对于那些需要快速原型开发或不希望引入大型XML解析库的项目。它的API直观且易于理解,使得XML解析成为一项轻松的任务。不过,对于大型或复杂的应用场景,可能需要考虑性能更优、功能更全面的库,如pugixml或Xerces-C++。








- 1

- #完美解决问题
- #运行顺畅
- #内容详尽
- #全网独家
- #注释完整
- qiushye2015-09-10不错,有报错的功能
- come欧尼2019-10-09可以使用哦。

- 粉丝: 2
- 资源: 3
我的内容管理 展开
我的资源 快来上传第一个资源
我的收益
登录查看自己的收益我的积分 登录查看自己的积分
我的C币 登录后查看C币余额
我的收藏
我的下载
下载帮助


最新资源
- 2024年中国省、市、县驻地点位数据.zip
- 2011-2022年 省级-人均拥有公共图书馆藏数量.zip
- 基于MATLAB语音信号去噪实现(含GUI)
- matlab程序代码项目案例:使用 Simulink 进行自适应 MPC 设计
- 完整性访问控制系统-计算机系统安全实验
- 基于PCA算法的人脸识别MATLAB源码
- 大二计算机系统综合(SoC)实验的一些参考资料
- 基于Python+Pytorch的轴承故障分析(含CNN、SVM、KNN算法)
- 一个计算机系统实验课程-流水线CPU的设计
- 基于SIFT算法的图像拼接MATLAB源码(含GUI)
- 2023年 中国环境统计年鉴.zip
- 基于微信小程序的商场电子优惠券系统设计全套代码+数据库
- 2024年 专精特新中小企业基本信息库.zip
- 2000-2023年 上市公司-气候风险总词频、气候风险指数.zip
- 2000-2022年 上市公司-战略性新兴产业企业面板数据及企业名单.zip
- 2008-2023年 上市公司-企业研发操纵数据.zip


