#include "TUrlParser.h"
#pragma comment(lib,"ws2_32.lib")
TUrlParser::TUrlParser(const string pUrl):sUrl(pUrl) {
}
unsigned int TUrlParser::GetPort() {
return uiPort;
}
string TUrlParser::GetHostname() {
return sHostname;
}
string TUrlParser::GetDomain() {
return sDomain;
}
string TUrlParser::GetProtocol() {
return sProtocol;
}
string TUrlParser::GetFilePath() {
return sFilePath;
}
string TUrlParser::GetRequestParameter() {
return sRequestParameter;
}
map<string, string> TUrlParser::GetRequestParameters() {
return mpRequestParameters;
}
string TUrlParser::GetFragment() {
return sFragment;
}
vector<string> TUrlParser::GetIPs() {
return vtIPAddrs;
}
void TUrlParser::Launch() {
if (!Isvalid()) {
cout << "ERROR-->the url is invalid!" << endl;
return;
}
UrlSplit();
}
bool TUrlParser::Isvalid() {
//(https?|ftp|file)://[-A-Za-z0-9+&@#/%?=~_|!:,.;]+[-A-Za-z0-9+&@#/%=~_|] from stackoverflow
regex tmp_stdUrl(STDURL); //the url pattern
return regex_match(sUrl,tmp_stdUrl);
}
void TUrlParser::UrlSplit() {
//scheme:[//[user[:password]@]host[:port]][/path][?query][#fragment]
string tmp_url=sUrl;
size_t tmp_pos=tmp_url.find_first_of("://");
sProtocol = tmp_url.substr(0, tmp_pos); //get the protocol
uiPort = strcmp(sProtocol.c_str(), "http") ? 443 : 80; //determine the port used
tmp_url = tmp_url.substr(tmp_pos+3);
tmp_pos = tmp_url.find_first_of("/");
if (tmp_pos == tmp_url.npos) { //if the url ends with hostname part
tmp_pos = tmp_url.find_first_of(":");
if (tmp_pos == tmp_url.npos) { //if without port in the url
sHostname = tmp_url;
}
else { //if with port in the url
sHostname = tmp_url.substr(0, tmp_pos);
uiPort = static_cast<unsigned int>(atoi(tmp_url.substr(tmp_pos + 1).c_str()));
}
size_t tmp_pos1 = sHostname.find_first_of(".");
sDomain = sHostname.substr(tmp_pos1 + 1); //get the domain
if (!DeriveIPByWinSock()) {
return;
} //get the IPs by using winsock
return;
}
else { //if the url contains request parameter part
string tmp_hostname = tmp_url.substr(0, tmp_pos);
tmp_url = tmp_url.substr(tmp_pos + 1);
tmp_pos = tmp_hostname.find_first_of(":");
if (tmp_pos == tmp_hostname.npos) { //if without port in the url
sHostname = tmp_hostname;
}
else { //if with port in the url
sHostname = tmp_hostname.substr(0, tmp_pos);
uiPort = static_cast<unsigned int>(atoi(tmp_hostname.substr(tmp_pos + 1).c_str()));
}
size_t tmp_pos1 = sHostname.find_first_of(".");
sDomain = sHostname.substr(tmp_pos1 + 1); //get the domain
if (!DeriveIPByWinSock()) {
return;
} //get the IPs by using winsock
}
tmp_pos = tmp_url.find_first_of("?");
if (tmp_pos==tmp_url.npos) { //if the url ends with file path
sFilePath = tmp_url; //get the file path
return;
}
else { //if the url contains request parameters
sFilePath = tmp_url.substr(0,tmp_pos); //get the file path
tmp_url = tmp_url.substr(tmp_pos+1);
}
tmp_pos = tmp_url.find_first_of("#");
if (tmp_pos==tmp_url.npos) { //if the url ends with the request parameter
sRequestParameter = tmp_url; //get the request parameter string
//split the request parameters into key-value form
ParameterSplit();
return;
}
else { //if the url contains fragment part
sRequestParameter = tmp_url.substr(0,tmp_pos);
//split the request parameters into key-value form
ParameterSplit();
sFragment = tmp_url.substr(tmp_pos+1); //get the remain part
}
}
bool TUrlParser::DeriveIPByWinSock() {
WSADATA wsaData;
if (WSAStartup(MAKEWORD(2, 2), &wsaData)==SOCKET_ERROR) {
cout << "ERROR->Failed to initialize the windows socket!" << endl;
return false;
}
hostent* pHost = gethostbyname(sHostname.c_str());
if (pHost == nullptr) {
cout << "ERROR->This is probably an invalid url!" << endl;
return false;
}
for (int i = 0; pHost->h_addr_list[i] != nullptr;i++) {
//cout << inet_ntoa(*(struct in_addr*)pHost->h_addr_list[i]) << endl; //get ips
vtIPAddrs.push_back(inet_ntoa(*(struct in_addr*)pHost->h_addr_list[i]));
}
return true;
}
void TUrlParser::ParameterSplit() {
string tmp_key, tmp_value;
size_t tmp_pos;
string tmp_parameters = sRequestParameter;
while (true) {
tmp_pos = tmp_parameters.find_first_of("&");
if (tmp_pos == tmp_parameters.npos) { //if no key-value any more
tmp_pos = tmp_parameters.find_first_of("=");
tmp_key = tmp_parameters.substr(0,tmp_pos);
tmp_value = tmp_parameters.substr(tmp_pos+1);
mpRequestParameters.insert(make_pair(tmp_key,tmp_value));
break;
}
else {
string tmp_parameter = tmp_parameters.substr(0,tmp_pos);
tmp_parameters = tmp_parameters.substr(tmp_pos + 1);
tmp_pos = tmp_parameter.find_first_of("=");
tmp_key = tmp_parameter.substr(0, tmp_pos);
tmp_value = tmp_parameter.substr(tmp_pos + 1);
mpRequestParameters.insert(make_pair(tmp_key, tmp_value));
}
}
}
C++实现简单的url解析
版权申诉
5星 · 超过95%的资源 23 浏览量
2021-02-17
08:25:16
上传
评论
收藏 3KB RAR 举报
mylyt
- 粉丝: 1
- 资源: 2