<?php
class idn
{
// NP See below
// Internal settings, do not mess with them
private $_punycode_prefix = 'xn--';
private $_invalid_ucs = 0x80000000;
private $_max_ucs = 0x10FFFF;
private $_base = 36;
private $_tmin = 1;
private $_tmax = 26;
private $_skew = 38;
private $_damp = 700;
private $_initial_bias = 72;
private $_initial_n = 0x80;
private $_sbase = 0xAC00;
private $_lbase = 0x1100;
private $_vbase = 0x1161;
private $_tbase = 0x11A7;
private $_lcount = 19;
private $_vcount = 21;
private $_tcount = 28;
private $_ncount = 588; // _vcount * _tcount
private $_scount = 11172; // _lcount * _tcount * _vcount
private $_error = false;
// See {@link set_paramter()} for details of how to change the following
// settings from within your script / application
private $_api_encoding = 'utf8'; // Default input charset is UTF-8
private $_allow_overlong = false; // Overlong UTF-8 encodings are forbidden
private $_strict_mode = false; // Behave strict or not
/**
* the constructor
*
* @param array $options
* @return boolean
* @since 0.5.2
*/
public function __construct($options = false)
{
$this->slast = $this->_sbase + $this->_lcount * $this->_vcount * $this->_tcount;
// If parameters are given, pass these to the respective method
if (is_array($options)) return $this->set_parameter($options);
return true;
}
/**
* Sets a new option value. Available options and values:
* [encoding - Use either UTF-8, UCS4 as array or UCS4 as string as input ('utf8' for UTF-8,
* 'ucs4_string' and 'ucs4_array' respectively for UCS4); The output is always UTF-8]
* [overlong - Unicode does not allow unnecessarily long encodings of chars,
* to allow this, set this parameter to true, else to false;
* default is false.]
* [strict - true: strict mode, good for registration purposes - Causes errors
* on failures; false: loose mode, ideal for "wildlife" applications
* by silently ignoring errors and returning the original input instead
*
* @param mixed Parameter to set (string: single parameter; array of Parameter => Value pairs)
* @param string Value to use (if parameter 1 is a string)
* @return boolean true on success, false otherwise
*/
public function set_parameter($option, $value = false)
{
if (!is_array($option)) {
$option = array($option => $value);
}
foreach ($option as $k => $v) {
switch ($k) {
case 'encoding':
switch ($v) {
case 'utf8':
case 'ucs4_string':
case 'ucs4_array':
$this->_api_encoding = $v;
break;
default:
$this->_error('Set Parameter: Unknown parameter '.$v.' for option '.$k);
return false;
}
break;
case 'overlong':
$this->_allow_overlong = ($v) ? true : false;
break;
case 'strict':
$this->_strict_mode = ($v) ? true : false;
break;
default:
$this->_error('Set Parameter: Unknown option '.$k);
return false;
}
}
return true;
}
/**
* Decode a given ACE domain name
* @param string Domain name (ACE string)
* [@param string Desired output encoding, see {@link set_parameter}]
* @return string Decoded Domain name (UTF-8 or UCS-4)
*/
public function decode($input, $one_time_encoding = false)
{
// Optionally set
if ($one_time_encoding) {
switch ($one_time_encoding) {
case 'utf8':
case 'ucs4_string':
case 'ucs4_array':
break;
default:
$this->_error('Unknown encoding '.$one_time_encoding);
return false;
}
}
// Make sure to drop any newline characters around
$input = trim($input);
// Negotiate input and try to determine, whether it is a plain string,
// an email address or something like a complete URL
if (strpos($input, '@')) { // Maybe it is an email address
// No no in strict mode
if ($this->_strict_mode) {
$this->_error('Only simple domain name parts can be handled in strict mode');
return false;
}
list ($email_pref, $input) = explode('@', $input, 2);
$arr = explode('.', $input);
foreach ($arr as $k => $v) {
if (preg_match('!^'.preg_quote($this->_punycode_prefix, '!').'!', $v)) {
$conv = $this->_decode($v);
if ($conv) $arr[$k] = $conv;
}
}
$input = join('.', $arr);
$arr = explode('.', $email_pref);
foreach ($arr as $k => $v) {
if (preg_match('!^'.preg_quote($this->_punycode_prefix, '!').'!', $v)) {
$conv = $this->_decode($v);
if ($conv) $arr[$k] = $conv;
}
}
$email_pref = join('.', $arr);
$return = $email_pref . '@' . $input;
} elseif (preg_match('![:\./]!', $input)) { // Or a complete domain name (with or without paths / parameters)
// No no in strict mode
if ($this->_strict_mode) {
$this->_error('Only simple domain name parts can be handled in strict mode');
return false;
}
$parsed = parse_url($input);
if (isset($parsed['host'])) {
$arr = explode('.', $parsed['host']);
foreach ($arr as $k => $v) {
$conv = $this->_decode($v);
if ($conv) $arr[$k] = $conv;
}
$parsed['host'] = join('.', $arr);
$return =
(empty($parsed['scheme']) ? '' : $parsed['scheme'].(strtolower($parsed['scheme']) == 'mailto' ? ':' : '://'))
.(empty($parsed['user']) ? '' : $parsed['user'].(empty($parsed['pass']) ? '' : ':'.$parsed['pass']).'@')
.$parsed['host']
.(empty($parsed['port']) ? '' : ':'.$parsed['port'])
.(empty($parsed['path']) ? '' : $parsed['path'])
.(empty($parsed['query']) ? '' : '?'.$parsed['query'])
.(empty($parsed['fragment']) ? '' : '#'.$parsed['fragment']);
} else { // parse_url seems to have failed, try without it
$arr = explode('.', $input);
foreach ($arr as $k => $v) {
$conv = $this->_decode($v);
$arr[$k] = ($conv) ? $conv : $v;
}
$return = join('.', $arr);
}
} else { // Otherwise we consider it being a pure domain name string
$return = $this->_decode($input);
if (!$return) $return = $input;
}
// The output is UTF-8 by default, other output formats need conversion here
// If one time encoding is given, use this, else the objects property
switch (($one_time_encoding) ? $one_time_encoding : $this->_api_encoding) {
case 'utf8':
return $return;
break;
case 'ucs4_string':
return $this->_ucs4_to_ucs4_string($this->_utf8_to_ucs4($return));
break;
case 'ucs4_array':
return $this->_utf8_to_ucs4($return);
break;
default:
$this->_error('Unsupported output format');
return false;
}
}