<?php
/*************************************************
Snoopy - the PHP net client
Author: Monte Ohrt <monte@ispi.net>
Copyright (c): 1999-2008 New Digital Group, all rights reserved
Version: 1.2.4
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
You may contact the author of Snoopy by e-mail at:
monte@ohrt.com
The latest version of Snoopy can be obtained from:
http://snoopy.sourceforge.net/
*************************************************/
class Snoopy
{
/**** Public variables ****/
/* user definable vars */
var $host = "www.php.net"; // host name we are connecting to
var $port = 80; // port we are connecting to
var $proxy_host = ""; // proxy host to use
var $proxy_port = ""; // proxy port to use
var $proxy_user = ""; // proxy user to use
var $proxy_pass = ""; // proxy password to use
var $agent = "Mozilla/4.0 (compatible; MSIE 5.01; Windows NT 5.0)"; // agent we masquerade as
var $referer = ""; // referer info to pass
var $cookies = array(); // array of cookies to pass
// $cookies["username"]="joe";
var $rawheaders = array(); // array of raw headers to send
// $rawheaders["Content-type"]="text/html";
var $maxredirs = 5; // http redirection depth maximum. 0 = disallow
var $lastredirectaddr = ""; // contains address of last redirected address
var $offsiteok = true; // allows redirection off-site
var $maxframes = 0; // frame content depth maximum. 0 = disallow
var $expandlinks = true; // expand links to fully qualified URLs.
// this only applies to fetchlinks()
// submitlinks(), and submittext()
var $passcookies = true; // pass set cookies back through redirects
// NOTE: this currently does not respect
// dates, domains or paths.
var $user = ""; // user for http authentication
var $pass = ""; // password for http authentication
// http accept types
var $accept = "image/gif, image/x-xbitmap, image/jpeg, image/pjpeg, */*";
var $results = ""; // where the content is put
var $error = ""; // error messages sent here
var $response_code = ""; // response code returned from server
var $headers = array(); // headers returned from server sent here
var $maxlength = 500000; // max return data length (body)
var $read_timeout = 0; // timeout on read operations, in seconds
// supported only since PHP 4 Beta 4
// set to 0 to disallow timeouts
var $timed_out = false; // if a read operation timed out
var $status = 0; // http request status
var $temp_dir = "/tmp"; // temporary directory that the webserver
// has permission to write to.
// under Windows, this should be C:\temp
var $curl_path = "/usr/local/bin/curl";
// Snoopy will use cURL for fetching
// SSL content if a full system path to
// the cURL binary is supplied here.
// set to false if you do not have
// cURL installed. See http://curl.haxx.se
// for details on installing cURL.
// Snoopy does *not* use the cURL
// library functions built into php,
// as these functions are not stable
// as of this Snoopy release.
/**** Private variables ****/
var $_maxlinelen = 4096; // max line length (headers)
var $_httpmethod = "GET"; // default http request method
var $_httpversion = "HTTP/1.0"; // default http request version
var $_submit_method = "POST"; // default submit method
var $_submit_type = "application/x-www-form-urlencoded"; // default submit type
var $_mime_boundary = ""; // MIME boundary for multipart/form-data submit type
var $_redirectaddr = false; // will be set if page fetched is a redirect
var $_redirectdepth = 0; // increments on an http redirect
var $_frameurls = array(); // frame src urls
var $_framedepth = 0; // increments on frame depth
var $_isproxy = false; // set if using a proxy server
var $_fp_timeout = 30; // timeout for socket connection
var $base_url = '';
var $decode = '';
/*======================================================================*\
Function: fetch
Purpose: fetch the contents of a web page
(and possibly other protocols in the
future like ftp, nntp, gopher, etc.)
Input: $URI the location of the page to fetch
Output: $this->results the output text from the fetch
\*======================================================================*/
function fetch($URI)
{
//preg_match("|^([^:]+)://([^:/]+)(:[\d]+)*(.*)|",$URI,$URI_PARTS);
$URI_PARTS = parse_url($URI);
if (!empty($URI_PARTS["user"]))
$this->user = $URI_PARTS["user"];
if (!empty($URI_PARTS["pass"]))
$this->pass = $URI_PARTS["pass"];
if (empty($URI_PARTS["query"]))
$URI_PARTS["query"] = '';
if (empty($URI_PARTS["path"]))
$URI_PARTS["path"] = '';
switch(strtolower($URI_PARTS["scheme"]))
{
case "http":
$this->host = $URI_PARTS["host"];
if(!empty($URI_PARTS["port"]))
$this->port = $URI_PARTS["port"];
if($this->_connect($fp))
{
if($this->_isproxy)
{
// using proxy, send entire URI
$this->_httprequest($URI,$fp,$URI,$this->_httpmethod);
}
else
{
$path = $URI_PARTS["path"].($URI_PARTS["query"] ? "?".$URI_PARTS["query"] : "");
// no proxy, send only the path
$this->_httprequest($path, $fp, $URI, $this->_httpmethod);
}
$this->_disconnect($fp);
if($this->_redirectaddr)
{
/* url was redirected, check if we've hit the max depth */
if($this->maxredirs > $this->_redirectdepth)
{
// only follow redirect if it's on this site, or offsiteok is true
if(preg_match("|^http://".preg_quote($this->host)."|i",$this->_redirectaddr) || $this->offsiteok)
{
/* follow the redirect */
$this->_redirectdepth++;
$this->lastredirectaddr=$this->_redirectaddr;
$this->fetch($this->_redirectaddr);
}
}
}
if($this->_framedepth < $this->maxframes && count($this->_frameurls) > 0)
{
$frameurls = $this->_frameurls;
$this->_frameurls = array();
while(list(,$frameurl) = each($frameurls))
{
if($this->_framedepth < $this->maxframes)
{
$this->fetch($frameurl);
$this->_framedepth++;
}
else
break;
}
}
}
else
{
return false;
}
return true;
break;
case "https":
if(!$this->curl_path)
return false;
if(function_exists("is_executable"))
if (!is_executable($this->curl_path))
return false;
$this->host = $URI_PARTS["host"];
if(!empty($URI_PARTS["port"]))
$this->port = $URI_PARTS["port"];
if($this->_isproxy)
{
// using proxy, send entire URI
$this->_httpsrequest($URI,$URI,$this->_httpmethod);
}
else
{
$path = $URI_PARTS["path"].($URI_PARTS["query"] ? "?".$URI_PAR
- 1
- 2
- 3
- 4
- 5
- 6
前往页