<?php
// {{{ license
/* vim: set expandtab tabstop=4 shiftwidth=4 softtabstop=4 foldmethod=marker: */
//
// +----------------------------------------------------------------------+
// | This library is free software; you can redistribute it and/or modify |
// | it under the terms of the GNU Lesser General Public License as |
// | published by the Free Software Foundation; either version 2.1 of the |
// | License, or (at your option) any later version. |
// | |
// | This library is distributed in the hope that it will be useful, but |
// | WITHOUT ANY WARRANTY; without even the implied warranty of |
// | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
// | Lesser General Public License for more details. |
// | |
// | You should have received a copy of the GNU Lesser General Public |
// | License along with this library; if not, write to the Free Software |
// | Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 |
// | USA. |
// +----------------------------------------------------------------------+
//
// }}}
/**
* Encode/decode Internationalized Domain Names.
*
* The class allows to convert internationalized domain names
* (see RFC 3490 for details) as they can be used with various registries worldwide
* to be translated between their original (localized) form and their encoded form
* as it will be used in the DNS (Domain Name System).
*
* The class provides two public methods, encode() and decode(), which do exactly
* what you would expect them to do. You are allowed to use complete domain names,
* simple strings and complete email addresses as well. That means, that you might
* use any of the following notations:
*
* - www.n�rgler.com
* - xn--nrgler-wxa
* - xn--brse-5qa.xn--knrz-1ra.info
*
* Unicode input might be given as either UTF-8 string, UCS-4 string or UCS-4
* array. Unicode output is available in the same formats.
* You can select your preferred format via {@link set_paramter()}.
*
* ACE input and output is always expected to be ASCII.
*
* @author Matthias Sommerfeld <mso@phlylabs.de>
* @author Leonid Kogan <lko@neuse.de>
* @copyright 2004-2009 phlyLabs Berlin, http://phlylabs.de
* @version 0.6.3
* @changelog since 0.5.1 class updated to PHP5/6 style should be compatible to PHP 4.3+
* - added a missing replace mapping for THAI CHARACTER SARA AM
*/
class idna_convert
{
// NP See below
// Internal settings, do not mess with them
private $_punycode_prefix = 'xn--';
private $_invalid_ucs = 0x80000000;
private $_max_ucs = 0x10FFFF;
private $_base = 36;
private $_tmin = 1;
private $_tmax = 26;
private $_skew = 38;
private $_damp = 700;
private $_initial_bias = 72;
private $_initial_n = 0x80;
private $_sbase = 0xAC00;
private $_lbase = 0x1100;
private $_vbase = 0x1161;
private $_tbase = 0x11A7;
private $_lcount = 19;
private $_vcount = 21;
private $_tcount = 28;
private $_ncount = 588; // _vcount * _tcount
private $_scount = 11172; // _lcount * _tcount * _vcount
private $_error = false;
// See {@link set_paramter()} for details of how to change the following
// settings from within your script / application
private $_api_encoding = 'utf8'; // Default input charset is UTF-8
private $_allow_overlong = false; // Overlong UTF-8 encodings are forbidden
private $_strict_mode = false; // Behave strict or not
/**
* the constructor
*
* @param array $options
* @return boolean
* @since 0.5.2
*/
public function __construct($options = false)
{
$this->slast = $this->_sbase + $this->_lcount * $this->_vcount * $this->_tcount;
// If parameters are given, pass these to the respective method
if (is_array($options)) return $this->set_parameter($options);
return true;
}
/**
* Sets a new option value. Available options and values:
* [encoding - Use either UTF-8, UCS4 as array or UCS4 as string as input ('utf8' for UTF-8,
* 'ucs4_string' and 'ucs4_array' respectively for UCS4); The output is always UTF-8]
* [overlong - Unicode does not allow unnecessarily long encodings of chars,
* to allow this, set this parameter to true, else to false;
* default is false.]
* [strict - true: strict mode, good for registration purposes - Causes errors
* on failures; false: loose mode, ideal for "wildlife" applications
* by silently ignoring errors and returning the original input instead
*
* @param mixed Parameter to set (string: single parameter; array of Parameter => Value pairs)
* @param string Value to use (if parameter 1 is a string)
* @return boolean true on success, false otherwise
*/
public function set_parameter($option, $value = false)
{
if (!is_array($option)) {
$option = array($option => $value);
}
foreach ($option as $k => $v) {
switch ($k) {
case 'encoding':
switch ($v) {
case 'utf8':
case 'ucs4_string':
case 'ucs4_array':
$this->_api_encoding = $v;
break;
default:
$this->_error('Set Parameter: Unknown parameter '.$v.' for option '.$k);
return false;
}
break;
case 'overlong':
$this->_allow_overlong = ($v) ? true : false;
break;
case 'strict':
$this->_strict_mode = ($v) ? true : false;
break;
default:
$this->_error('Set Parameter: Unknown option '.$k);
return false;
}
}
return true;
}
/**
* Decode a given ACE domain name
* @param string Domain name (ACE string)
* [@param string Desired output encoding, see {@link set_parameter}]
* @return string Decoded Domain name (UTF-8 or UCS-4)
*/
public function decode($input, $one_time_encoding = false)
{
// Optionally set
if ($one_time_encoding) {
switch ($one_time_encoding) {
case 'utf8':
case 'ucs4_string':
case 'ucs4_array':
break;
default:
$this->_error('Unknown encoding '.$one_time_encoding);
return false;
}
}
// Make sure to drop any newline characters around
$input = trim($input);
// Negotiate input and try to determine, whether it is a plain string,
// an email address or something like a complete URL
if (strpos($input, '@')) { // Maybe it is an email address
// No no in strict mode
if ($this->_strict_mode) {
$this->_error('Only simple domain name parts can be handled in strict mode');
return false;
}
list ($email_pref, $input) = explode('@', $input, 2);
$arr = explode('.', $input);
foreach ($arr as $k => $v) {
if (preg_match('!^'.preg_quote($this->_punycode_prefix, '!').'!', $v)) {
$conv = $this->_decode($v);
if ($conv) $arr[$k] = $conv;
}
}
$input = join('.', $arr);
$arr = explode('.', $email_pref);
- 1
- 2
- 3
前往页