#include "StdAfx.h"
#include "DomTree.h"
#include <algorithm>
const char * stristr(const char * str1, const char * str2)
{
if ( str1==NULL || str2==NULL || strlen(str1)==0 || strlen(str2)==0 ) return 0;
unsigned int l1=strlen(str1);
unsigned int l2=strlen(str2);
if (l1<l2){
return 0;
}else if (l1==l2){
return stricmp(str1,str2)==0?str1:NULL;
}
const char *c=0;
for(unsigned int i=0;i<l1-l2+1;i++)
{
c=str1+i;
for (unsigned int n=0;n<l2;n++)
{
if ( tolower(str1[i+n])!=tolower(str2[n]) )
{
c=0;n=l2;
}
}
if (c) break;
}
return c;
}
bool GetTagType(const char * buff,string & str)
{
ASSERT(buff);
const char * pos = strstr(buff,">");
if(!pos) return false;
int len = pos - buff + 1;
char * pBuff = new char [len];
memset(pBuff,0,sizeof(char) * len);
bool bCopy = false;
int i = 0;
while (true)
{
if((*buff >= 'A' && *buff <= 'Z') || (*buff >= 'a' && *buff <= 'z') )
{
bCopy = true;
}
else
{
if(bCopy) break;
bCopy = false;
buff++;
continue;
}
if(bCopy)
{
pBuff[i] = *buff;
i++;
}
buff ++;
}
str = pBuff;
delete [] pBuff;
return true;
}
CDomTree::CDomTree(void)
{
m_pBuff = NULL;
m_proot = NULL;
}
CDomTree::~CDomTree(void)
{
if(m_pBuff)
{
delete [] m_pBuff;
m_pBuff = NULL;
}
DeleteNode(m_proot);
}
bool CDomTree::LoadTree(char * buff)
{
ASSERT(buff);
if(m_pBuff)
{
delete [] m_pBuff;
m_pBuff = NULL;
}
int len = strlen(buff) + 1;
m_pBuff = new char [len];
memset(m_pBuff,0,sizeof(char) * len);
strcpy(m_pBuff,buff);
if (m_proot)
{
DeleteNode(m_proot);
}
CleanTag();
m_nNodeCount = 0;
m_proot = new CNode;
char * pos = strstr(m_pBuff,"<");
if(pos)
{
m_proot->SetStart(pos);
m_proot->SetEnd(m_pBuff + strlen(m_pBuff));
Adapter(++pos,m_proot);
return true;
}
else
{
return false;
}
}
void CDomTree::CleanTag()
{
const char * tag [3][2] =
{
{"<script","</script>"},
{"<style","</style>"},
{"<!","->"}
};
for (int i = 0;i <3; i++)
{
const char * tags = tag[i][0];
const char * tage = tag[i][1];
while(true)
{
const char * pos = stristr(m_pBuff,tags);
if(!pos) break;
const char * pose = stristr(pos,tage);
int len =0;
if(!pose)
len = strlen(pos);
else
len = pose - pos + strlen(tage);
/*int npos = pos - m_pBuff;*/
strnset((char *)pos,' ',len);
}
}
}
void CDomTree::DeleteNode(CNode * node)
{
list<CNode *> lsnode = node->GetChildNode();
for (list<CNode *>::iterator itr = lsnode.begin();itr != lsnode.end();itr++)
{
DeleteNode((CNode *)(*itr));
}
delete node;
}
CNode * CDomTree::CheckParentNode(const CNode * node,const char * szTag)
{
const CNode * tmp = node;
while (true)
{
if (tmp->GetLength() == 0)
{
string str = tmp->GetNodeType();
if(stricmp(str.c_str(),szTag) == 0)
{
return (CNode *)tmp;
}
else
{
tmp = tmp->GetParent();
if(!tmp) return NULL;
}
}
else
{
tmp = tmp->GetParent();
if(!tmp) return NULL;
}
}
}
list<CNode> CDomTree::GetTagById(const char * szId)const
{
list<CNode> ls;
for (list<CNode *>::const_iterator itr = m_lsNode.begin();
itr != m_lsNode.end(); itr++)
{
if(stricmp(szId,(*itr)->GetId().c_str()) == 0)
ls.push_back((**itr));
}
return ls;
}
list<CNode> CDomTree::GetTagByName(const char * szName)const
{
list<CNode> ls;
for (list<CNode *>::const_iterator itr = m_lsNode.begin();
itr != m_lsNode.end(); itr++)
{
if(stricmp(szName,(*itr)->GetName().c_str()) == 0)
ls.push_back((**itr));
}
return ls;
}
list<CNode> CDomTree::GetTagByType(const char * szType)const
{
list<CNode> ls;
for (list<CNode *>::const_iterator itr = m_lsNode.begin();
itr != m_lsNode.end(); itr++)
{
if(stricmp(szType,(*itr)->GetNodeType().c_str()) == 0)
ls.push_back((**itr));
}
return ls;
}
//list<string> CDomTree::GetHerf(list<CNode> & lsNode) const
//{
//
//}
void CDomTree::Adapter(const char * buff,CNode * pnode)
{
ASSERT(buff);
const char * pos = strstr(buff,"<");//先查查 是不是有<
if(!pos) return;
const char * pNext = NULL;
if(strlen(pos)> 1) pNext = pos + 1;
if(*pNext == '/')
{
pos = strstr(pos,">");
string str = "";
::GetTagType(pNext,str);
CNode * ppnode = CheckParentNode(pnode,str.c_str()); //顺着父节点往上爬,看哪个父节点没有结尾
if(!ppnode) return;
ppnode->SetEnd(pos);//设置结尾
pos ++;
ppnode = ppnode->GetParent();
if(!ppnode) return;
Adapter(pos,ppnode);//看看这个节点还有没有子节点
}
else
{
CNode * node = new CNode;
m_lsNode.push_back(node);
node->SetStart(pos);
node->SetParent(pnode);
m_nNodeCount ++;
pnode->AddChildNode(node);
Adapter(pNext,node);
}
}
//参数必须是小写的
string CNode::GetValue(const char * szType) const
{
int len = 0;
const char * pos = stristr(m_pStart,">");//stristr(m_pStart,">");
if(!pos)
len = m_nLen + 1;
else
len = pos - m_pStart + 1;
char * p = new char [len];
strncpy(p,m_pStart,len -1);
//strlwr(p);
pos = stristr(p,szType);
if(!pos)
{
delete [] p;
p = NULL;
return "";
}
char * buff =new char [len];
memset(buff,0,sizeof(char) * len);
bool bCopy = false;
pos += strlen(szType);
int i = 0;
while (*pos != '\0')
{
if(*pos == '\"')
{
if(bCopy == false)
{
bCopy = true;
pos ++;
continue;
}
else
{
break;
}
}
if(bCopy && *pos != ' ')
{
buff[i] = *pos;
i++;
}
pos ++;
}
string str(buff);
delete [] buff;
delete [] p;
return str;
}
string CNode::GetId() const
{
return GetValue("id=");
}
string CNode::GetName() const
{
return GetValue("name=");
}
int CNode::GetLength() const
{
return m_nLen;
}
void CNode::SetParent(const CNode * pParent)
{
m_pParent = (CNode *)pParent;
}
void CNode::AddChildNode(const CNode * node)
{
m_lsChildNode.push_back((CNode * )node);
}
int CNode::GetChildCount() const
{
return m_lsChildNode.size();
}
list<CNode *> CNode::GetChildNode() const
{
return m_lsChildNode;
}
CNode * CNode::GetParent() const
{
return m_pParent;
}
string CNode::GetInnerText() const
{
string strHtml = GetInnerHtml();
const char * pos = NULL;
pos = strHtml.c_str();
string txt = "";
bool bCopy = false;
while (*pos != '\0')
{
if(*pos == '>')
{
bCopy = true;
pos ++;
continue;
}
if(*pos == '<')
{
bCopy = false;
pos ++;
continue;
}
if(bCopy && *pos != ' ')
txt.push_back(*pos);
pos ++;
}
return txt;
}
string CNode::GetInnerHtml() const
{
ASSERT(m_pStart);
char * p = new char [m_nLen + 1];
memset(p,0,sizeof(char) * (m_nLen + 1));
strncpy(p,m_pStart,m_nLen);
string str(p);
delete [] p;
p = NULL;
return str;
}
string CNode::GetNodeType() const
{
string str = "";
if(m_nLen != 0)
{
string strHtml = "";
strHtml = GetInnerHtml();
const char * pos = strHtml.c_str() + strHtml.length() - 1;
while (*pos != '/')
{
if(*pos != ' ' && *pos != '>') str.push_back(*pos);
pos --;
}
reverse(str.begin(),str.end());
}
else
{
::GetTagType(m_pStart,str);
}
return str;
}
void CNode::SetStart(const char * pstart)
{
m_pStart = (char *)pstart;
}
void CNode::SetEnd(const char * pend)
{
#ifdef DEBUG
m_pEnd = (char *)pend;
#endif
m_nLen = (pend - m_pStart) + 1;
}
void CNode::GetTagById(const char * szId,list<CNode> & ls)const
{
if(stricmp(szId,GetId().c_str()) == 0) ls.push_back(*this);
for(list<CNode *>::const_iterator itr = m_lsChildNode.begin();
itr != m_lsChildNode.end();itr ++)
{
CNode * pNode = (*itr);
pNode->GetTagById(szId,ls);
}
}
void C
- 1
- 2
前往页