<?php
$urlstr = vita_get_url_content("http://search.china.alibaba.com/company/k-%BB%AF%D7%B1%D0%D0%D2%B5_n-y.html");
//$urlstr = htmlspecialchars($urlstr);
$urlstr=iconv("GBK","UTF-8",$urlstr);
preg_match_all("/<a href=\"(.*?)\".*?class=\"syscat\">联系方式<\/a>/im",$urlstr,$result,PREG_PATTERN_ORDER);
$contact = $result[1];
for($i=0;$i<sizeof($contact);$i++)
{
$tmpurl = $contact[$i];
$contactHtml = vita_get_url_content($tmpurl);
$contactHtml=iconv("GBK","UTF-8",$contactHtml);
preg_match_all("/<span id=\"chinaname\" class=\"chinaname\">(.*?)<\/span>/sim",$contactHtml,$ContactCompanyName,PREG_PATTERN_ORDER);
preg_match_all("/<ul class=\"mainTextColor\">.*?<li >电.*?话:(.*?)<\/li>/sim",$contactHtml,$ContactTel,PREG_PATTERN_ORDER);
preg_match_all("/<ul class=\"mainTextColor\">.*?<li >移动电话:(.*?)<\/li>/sim",$contactHtml,$ContactMobile,PREG_PATTERN_ORDER);
preg_match_all("/<ul class=\"mainTextColor\">.*?<li >传.*?真:(.*?)<\/li>/sim",$contactHtml,$ContactFax,PREG_PATTERN_ORDER);
preg_match_all("/<ul class=\"mainTextColor\">.*?<li >地.*?址:(.*?)<\/li>/sim",$contactHtml,$ContactAddr,PREG_PATTERN_ORDER);
if($ContactCompanyName[0]!=null && $ContactCompanyName[0]!="")
{
@dbconnectInsertData($ContactCompanyName[1][0],$ContactTel[1][0],$ContactMobile[1][0],$ContactFax[1][0],$ContactAddr[1][0]);
}
}
echo '抓取数据完毕!';
/*
* 选择方式获取网页内容
* 如果不能成功,请先检测PHP.ini文件中allow_url_fopen是否为ON,如果已经开启,那么再检测curl服务是否开启。
*/
function vita_get_url_content($url)
{
if(function_exists('file_get_contents'))
{
$opts = array(
'http'=>array(
'method'=>"GET",
'header'=>"User-Agent: Mozilla/4.0\n"
)
);
$context = stream_context_create($opts);
$file_contents = file_get_contents($url,false,$context);
}
else
{
//user_agent
$useragent = "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1)";
//伪造header
$header = array('Accept-Language: zh-cn','Connection: Keep-Alive','Cache-Control: no-cache');
$ch = curl_init();
$timeout = 5;
curl_setopt($ch, CURLOPT_REFERER, $url);
curl_setopt($ch,CURLOPT_HTTPHEADER,$header);
curl_setopt($ch, CURLOPT_USERAGENT, $useragent);
curl_setopt($ch, CURLOPT_URL,$url);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
curl_setopt ($ch, CURLOPT_CONNECTTIMEOUT, $timeout);
$file_contents = curl_exec($ch);
curl_close($ch);
}
return $file_contents;
}
/*
* 连接数据库,插入数据
*/
function dbconnectInsertData($companyName,$tel,$mobile,$fax,$addr) {
$mysql_server_name="10.0.0.78"; //数据库服务器名称
$mysql_username="root"; // 连接数据库用户名
$mysql_password="000000"; // 连接数据库密码
$mysql_database="CatchInformation"; // 数据库的名字
// $companyName = mb_convert_encoding($companyName,"UTF-8","GBK");
// $addr = mb_convert_encoding($addr,"UTF-8","GBK");
$conn=mysql_connect($mysql_server_name,$mysql_username,$mysql_password);
mysql_query("SET NAMES 'UTF8'");
$sql="insert into CompanyContact(companyName,tel,mobile,fax,addr) values('".$companyName."','".$tel."','".$mobile."','".$fax."','".$addr."')";
mysql_select_db($mysql_database,$conn);
$result=mysql_query($sql);
mysql_free_result($result);
mysql_close($conn);
}
?>
- 1
- 2
- 3
前往页