tinyxml2数组_tinyxml源码解析(上)--688IT编程网

tinyxml2数组_tinyxml源码解析（上）

前⾔：

前段时间做功能可⾏性的时候简单的使⽤了tinyxml。在sourceforge下载的时候，看到⽼外对于这个库的评价很⾼，因此就有冲动进⾏学习这么⼀个优秀的框架。简单查看了代码，代码量也不是很⼤。刚好事情做的差不多，索性就学习学习好了。第⼀次尝试阅读正式的框架，⽔平也不是很好，可能多少会有点出错，要是有错误还望多包涵并且提出来，⼀起进步哈。

⽂章打算分为三部分，第⼀部分介绍了TiXmlBase，TiXmlNode这两个基类，第⼆部分详细了介绍了代表节点的⼏个类，第三部分介绍了库中提供的⼀些⼯具类。

正⾔：

先简单介绍下tinyxml吧。在我的前⼀篇⽂章也介绍了下，还有使⽤⽅法。tinyxml是⼀款轻量级的xml parser。其⽬的在于在提供基本的xml操作的同时保持⼀个简单的⽤户接⼝供⽤户使⽤。其能实现基本的xml的特性。如遍历xml⽂件，获取节点等。⾄于dtd,xstl等特性并不⽀持。详细可以查看我的上⼀篇⽂章。

tinyxml整体的类的架构图如下：

TiXmlBase是所有tinyxml⾥⾯的元素的基类，其提供了⼀些基本的操作和属性。⼀会分析。

xml中的节点的所有⽗类皆为TiXmlNode，其可以转化成任意⼀种特定的节点。⽽对于我们的element节点中的属性来说，其在构造时也是做为element的⼦节点来构建的，不过由于其只能出现在element中，因此并没有继承⾃TiXmlNode。

TiXmlNode也提供了⼀些基本的节点操作，如获取⼦节点，插⼊⼦节点等。

这篇⽂章将主要讲解TiXmlBase以及TiXmlNode。

TiXmlBase：

先讲下所有类的基类TiXmlBase吧。

这⾥是头⽂件的内容。

View Code

1 //TiXmlBase，所有的基类，提供了⼀些基本的操作和元素

2 classTiXmlBase

3 {

4 friend class TiXmlNode;//这⾥使⽤friend让⼈⽐较困惑，因为这三个都是⼦类，应该不⽤才是.在查看了tinyxmlparser.cpp的679⾏，主要是因为在函数中调⽤了做为参数的TiXmlNode的streamIn函数,⽽该函数是protected的，所以需要friend.当然肯定不⽌这么⼀个函数，应该⽤到了很多protected内的⽅法和参数.

5 friend class TiXmlElement;//这边就是同理了.

6 friend classTiXmlDocument;7

8 public:9 TiXmlBase() : userData(0) {}10 virtual ~TiXmlBase() {}11

12 /** Print是带格式输出，如果想不带格式可以直接使⽤<<13 */

14 virtual void Print( FILE* cfile, int depth ) const = 0;15

16 /** 由于xml中空格是否算元素还未定，因此作者留了个选项供⽤户选择17 */

18 static void SetCondenseWhiteSpace( bool condense ) { condenseWhiteSpace =condense; }19

20 static bool IsWhiteSpaceCondensed() { returncondenseWhiteSpace; }21

22 /** 返回的值为该元素在⽂件中的位置23 注意：增加或者删除节点并不会改变位置，这个位置实际上实在load的时候确定的.24 同时，计算位置有⼀点的性能损失，可以选择关闭25 */

26 int Row() const { w + 1; }27 int Column() const { l + 1; } ///< See Row()28

29 void SetUserData( void* user ) { userData = user; } ///< Set a pointer to arbitrary user data.30 void* GetUserData() { return userData; }///< Get a pointer to arbitrary user data.31 const void* GetUserData() const { return userData; }///< Get a pointer to arbitrary user data.32

33 //⽤来判断UTF-8字符集字符长度的数组(1~4)，具体定义在tinyxmlparser.cpp中

34 static const int utf8ByteTable[256];35 //纯虚函数，在⼦类中会重新定义，对不同节点进⾏不同处理，纯虚函数也使得该类为虚基类，不可⽣成变量

36 virtual const char* Parse( const char*p,37 TiXmlParsingData*data,38 TiXmlEncoding encoding /*=

TIXML_ENCODING_UNKNOWN*/ ) = 0;39

40 /** ⼯具函数，将节点值encode成相应的xml，其实就是对于等xml特殊元素进⾏处理41 */

42 static void EncodeString( const TIXML_STRING& str, TIXML_STRING* out);43 //定义了⼀些转化的时候的错误.

44 enum

45 {46 TIXML_NO_ERROR = 0,47 TIXML_ERROR,48 TIXML_ERROR_OPENING_FILE,49

TIXML_ERROR_PARSING_ELEMENT,50 TIXML_ERROR_FAILED_TO_READ_ELEMENT_NAME,51

TIXML_ERROR_READING_ELEMENT_VALUE,52 TIXML_ERROR_READING_ATTRIBUTES,53

TIXML_ERROR_PARSING_EMPTY,54 TIXML_ERROR_READING_END_TAG,55 TIXML_ERROR_PARSING_UNKNOWN,56 TIXML_ERROR_PARSING_COMMENT,57 TIXML_ERROR_PARSING_DECLARATION,58

TIXML_ERROR_DOCUMENT_EMPTY,59 TIXML_ERROR_EMBEDDED_NULL,60 TIXML_ERROR_PARSING_CDATA,61

TIXML_ERROR_DOCUMENT_TOP_ONLY,62

63 TIXML_ERROR_STRING_COUNT64 };65

66 protected:67 //⼯具函数，使之在处理⽂本的时候跳过空格.tinyxml中很多处理只针对english,latin,⽐如符号，字符等.不过通常情况下，这种处理结果还不错

isalpha 函数

68 static const char* SkipWhiteSpace( const char*, TiXmlEncoding encoding );69

70 inline static bool IsWhiteSpace( charc )71 {72 return ( isspace( (unsigned char) c ) || c == '\n' || c == '\r');73 }74 inline static bool IsWhiteSpace( intc )75 {76 if ( c < 256)77 return IsWhiteSpace( (char) c );78 return false; //Again, only truly correct for but usually works.

79 }80 //由于tinyxml对于⼀些不使⽤stl的⽤户也提供了接⼝，因此这样的宏很常见，也增加了不少接⼝.

81 #ifdef TIXML_USE_STL82 static bool StreamWhiteSpace( std::istream * in, TIXML_STRING *tag );83 static bool

StreamTo( std::istream * in, int character, TIXML_STRING *tag );84 #endif

86 /*⼯具函数,⽤来读取p中的名字,⽐如tagname，属性名，属性值等.87 */

88 static const char* ReadName( const char* p, TIXML_STRING*name, TiXmlEncoding encoding );89

90 /*读取in中的⽂本，即<> >中的⽂本,91 */

92 static const char* ReadText( const char* in, //where to start

93 TIXML_STRING* text, //the string read

94 bool ignoreWhiteSpace, //whether to keep the white space

95 const char* endTag, //what ends this text

96 bool ignoreCase, //whether to ignore case in the end tag

97 TiXmlEncoding encoding ); //the current encoding98

99 //处理xml中的实体，将转化回来，实现中⽤了些trick，之后展现

100 static const char* GetEntity( const char* in, char* value, int*length, TiXmlEncoding encoding );101

102 //转化⼀个char，如果是实体，同时进⾏转化，在ReadText中被调⽤

103 inline static const char* GetChar( const char* p, char* _value, int*length, TiXmlEncoding encoding )104 {105 assert( p );106 if ( encoding ==TIXML_ENCODING_UTF8 )107 {108 *length = utf8ByteTable[ *((const unsigned char*)p) ];109 assert( *length >= 0 && *length < 5);110 }111 else

112 {113 *length = 1;114 }115

116 if ( *length == 1)117 {118 if ( *p == '&')119 returnGetEntity( p, _value, length, encoding );120 *_value = *p;121 return p+1;122 }123 else if ( *length )124 {125 //strncpy( _value, p, *length );//lots of compilers don't like this function (unsafe),126 //and the null terminator isn't needed

127 for( int i=0; p[i] && i

133 {134 //Not valid text.

135 return 0;136 }137 }138

139 //进⾏⽐较,ignoreCase只针对英⽂⽀持.

140 static bool StringEqual( const char*p,141 const char*endTag,142 boolignoreCase,143 TiXmlEncoding encoding );144

145 static const char*errorString[ TIXML_ERROR_STRING_COUNT ];146 //TiXmlCursor 提供了位置信息，是⼀个包涵

row,column的结构体

147 TiXmlCursor location;148

149 ///Field containing a generic user pointer

150 void*userData;151

152 //三个函数只针对英⽂有⽤.⼯具函数

153 static int IsAlpha( unsigned charanyByte, TiXmlEncoding encoding );154 static int IsAlphaNum( unsigned charanyByte, TiXmlEncoding encoding );155 inline static int ToLower( intv, TiXmlEncoding encoding )156 {157 if ( encoding

==TIXML_ENCODING_UTF8 )158 {159 if ( v < 128 ) returntolower( v );160 returnv;161 }162 else

163 {164 returntolower( v );165 }166 }167 //UTF32到UTF8的转化函数，挺复杂的，我没有研究，稍后展⽰.

168 static void ConvertUTF32ToUTF8( unsigned long input, char* output, int*length );169

170 private:171 TiXmlBase( const TiXmlBase& ); //防⽌复制

172 void operator=( const TiXmlBase& base ); //防⽌复制173

174 //定义了⼀些实体，在ReadText中⽤于转化

175 structEntity176 {177 const char*str;178 unsigned intstrLength;179 charchr;180 };181 enum

182 {183 NUM_ENTITY = 5,184 MAX_ENTITY_LENGTH = 6

185

186 };187 static Entity entity[ NUM_ENTITY ];//在tinyxmlparser.cpp中定义了.

188 static bool condenseWhiteSpace;//是否跳过空格

189 };

读完头⽂件，来看看具体的实现

⾸先是之前提到的entity，定义在tinyxmlparser.cpp

//定义了xml中的entity数组.⽤于函数转化

TiXmlBase::Entity TiXmlBase::entity[ TiXmlBase::NUM_ENTITY ] ={

{"&", 5, '&'},

{"<", 4, '

{">", 4, '>'},

{""", 6, '\"'},

{"'", 6, '\''}

};

接下来就是说到的EncodeString了.

View Code

1 //对xml中实体进⾏转化,TIXML_STRING是作者封的⼀个宏.当定义了TIXML_USE_STL时，其即为string，否则就是作者⾃⼰实现的⼀个类.之后在讲解⼯具类时会提到.

2 void TiXmlBase::EncodeString( const TIXML_STRING& str, TIXML_STRING*outString )

3 {

4 int i=0;5

6 while( i

10 if ( c == '&'

11 && i < ( (int)str.length() - 2)12 && str[i+1] == '#'

13 && str[i+2] == 'x' )//为16进制的字符，直接复制.知道遇到';'

14 {15 while ( iappend( str.c_str() + i, 1);18 ++i;19 if ( str[i] == ';')20 break;21 }22 }23 //转化字符为数组中的实体.

24 else if ( c == '&')25 {26 outString->append( entity[0].str, entity[0].strLength );27 ++i;28 }29 else if ( c == 'append(

entity[1].str, entity[1].strLength );32 ++i;33 }34 else if ( c == '>')35 {36 outString->append( entity[2].str, entity[2].strLength );37 ++i;38 }39 else if ( c == '\"')40 {41 outString->append( entity[3].str, entity[3].strLength );42 ++i;43 }44 else if ( c ==

'\'')45 {46 outString->append( entity[4].str, entity[4].strLength );47 ++i;48 }49 //是特殊不显⽰字符，扩展

50 else if ( c < 32)51 {52 //Easy pass at non-alpha/numeric/symbol53 //Below 32 is symbolic.

54 char buf[ 32];55 //取低8位进⾏转化,TIXML_SNPRINTF是宏,56 //在tinyxml.h 62⾏中定义，主要是为了解决编译器兼容性问题

58 #if defined(TIXML_SNPRINTF)

59 TIXML_SNPRINTF( buf, sizeof(buf), "%02X;", (unsigned) ( c & 0xff) );60 #else

61 sprintf( buf, "%02X;", (unsigned) ( c & 0xff) );62 #endif

64 //*ME: warning C4267: convert 'size_t' to 'int'65 //*ME: Int-Cast to make compiler happy ...

66 outString->append( buf, (int)strlen( buf ) );67 ++i;68 }69 else

70 {71 //直接复制

72 *outString += (char) c;73 ++i;74 }75 }76 }

SkipWhiteSpace：这个实在没什么好说的.

1 //折叠⼜打不开了.就不折叠了

2 const char* TiXmlBase::SkipWhiteSpace( const char*p, TiXmlEncoding encoding )

3 {

4 if ( !p || !*p )

5 {

6 return 0;

7 }

8 if ( encoding ==TIXML_ENCODING_UTF8 )

9 {10 while ( *p )11 {12 const unsigned char* pU = (const unsigned char*)p;13

14 //跳过ms的UTF8头

15 if ( *(pU+0)==TIXML_UTF_LEAD_016 && *(pU+1)==TIXML_UTF_LEAD_117 && *(pU+2)==TIXML_UTF_LEAD_2 )18 {19 p += 3;20 continue;21 }22 else if(*(pU+0)==TIXML_UTF_LEAD_023 && *(pU+1)==0xbfU

24 && *(pU+2)==0xbeU)25 {26 p += 3;27 continue;28 }29 else if(*(pU+0)==TIXML_UTF_LEAD_030 && *(pU+1)==0xbfU

31 && *(pU+2)==0xbfU)32 {33 p += 3;34 continue;35 }36

37 //仍然采⽤了英⽂的,ansi的⽅式进⾏判断.

38 if ( IsWhiteSpace( *p ) )39 ++p;40 else

41 break;42 }43 }44 else

45 {46 //对于其他编码也是

47 while ( *p && IsWhiteSpace( *p ) )48 ++p;49 }50

51 returnp;52 }

ReadName

View Code

1 //读取element中的tagname和属性的name，写得很清晰，不⽤怎么解释

2 const char* TiXmlBase::ReadName( const char* p, TIXML_STRING *name, TiXmlEncoding encoding )

3 {

4 *name = "";

5 assert( p );6

7 if ( p && *p8 && ( IsAlpha( (unsigned char) *p, encoding ) || *p == '_') )9 {10 const char* start =p;11 while( p && *p12 && ( IsAlphaNum( (unsigned char ) *p, encoding )13 || *p == '_'

14 || *p == '-'

15 || *p == '.'

16 || *p == ':') )17 {18 //(*name) += *p;//while中的性能优化，将复制提到外头去.

19 ++p;20 }21 if ( p-start > 0) {22 name->assign( start, p-start );23 }24 returnp;25 }26 return 0;27 }

ReadText: 读取属性中的值，读取<>>之间的⽂本.

View Code

1 const char* TiXmlBase::ReadText( const char* p, //被读的⽂本

2 TIXML_STRING * text, //读取的字符

3 bool trimWhiteSpace, //是否要跳过空格

4 const char* endTag, //结束的tag，看起来⽐较别扭，不过⽤起来很⽅便

5 bool caseInsensitive, //判断tag相等，是否关注⼤⼩写

6 TiXmlEncoding encoding )

7 {

8 *text = "";

9 if ( !trimWhiteSpace //certain tags always keep whitespace

10 || !condenseWhiteSpace )11 {12 //Keep all the white space.

13 while ( p && *p14 && !StringEqual( p, endTag, caseInsensitive, encoding )15 )16 {17 intlen;18 char cArr[4] = { 0, 0, 0, 0};19 p = GetChar( p, cArr, &len, encoding );//详细看GetChar，这⾥取出的char已经是对实体进⾏了相应转化了

688IT编程网

tinyxml2数组_tinyxml源码解析(上)

发表评论

推荐文章

java正则表达式选择题

一种基于正则表达式的DBC文件解析及报文分析方法[发明专利]

工龄小数点提取

非零金额正则表达式

提取文本中数字的函数

热门文章

利用正则表达式实现文本数据提取与处理

正则表达式零宽断言详解

文本匹配规则

excel中使用正则

1-31正则表达式

anki之高级筛选

BUAA_OO_2021_第一单元总结

insert语句递增写法

sublime text 3在行前插入递增数字序号的方法

字符串只允许数字和英文的正则

powerbuilder 正则表达式

Shell脚本编写的高级技巧利用正则表达式进行字符串匹配

JAVA正则表达式的三种模式:贪婪,勉强和占有的讨论

go regexp匹配规则

oracle regexp_substr 实现原理

基本的元字符回溯引用和前后查匹配模式

elasticsearch query dsl正则

oracle sql正则表达式

GA-设置目标

仅匹配全角片假名的正则表达式

最新文章

java正则表达式选择题

工龄小数点提取

非零金额正则表达式

提取文本中数字的函数

vue数字相加小数点变长-概述说明以及解释

vue validate 正则验证小数长度

标签列表

688IT编程网

tinyxml2数组_tinyxml源码解析(上)

发表评论

推荐文章

java正则表达式 选择题

一种基于正则表达式的DBC文件解析及报文分析方法[发明专利]

工龄小数点提取

非零金额 正则表达式

提取文本中数字的函数

热门文章

利用正则表达式实现文本数据提取与处理

正则表达式零宽断言详解

文本匹配规则

excel中使用正则

1-31正则表达式

anki之高级筛选

BUAA_OO_2021_第一单元总结

insert语句递增写法

sublime text 3在行前插入递增数字序号的方法

字符串只允许数字和英文的正则

powerbuilder 正则表达式

Shell脚本编写的高级技巧利用正则表达式进行字符串匹配

JAVA正则表达式的三种模式:贪婪,勉强和占有的讨论

go regexp匹配规则

oracle regexp_substr 实现原理

基本的元字符 回溯引用和前后查 匹配模式

elasticsearch query dsl正则

oracle sql正则表达式

GA-设置目标

仅匹配全角片假名的正则表达式

最新文章

java正则表达式 选择题

工龄小数点提取

非零金额 正则表达式

提取文本中数字的函数

vue数字相加小数点变长-概述说明以及解释

vue validate 正则验证小数长度

标签列表

java正则表达式选择题

非零金额正则表达式

基本的元字符回溯引用和前后查匹配模式

java正则表达式选择题

非零金额正则表达式