/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.feilong.lib.validator;
import java.io.Serializable;
import java.net.IDN;
import java.util.Arrays;
import java.util.Locale;
/**
* <p>
* <b>Domain name</b> validation routines.
* </p>
*
* <p>
* This validator provides methods for validating Internet domain names
* and top-level domains.
* </p>
*
* <p>
* Domain names are evaluated according
* to the standards <a href="http://www.ietf.org/rfc/rfc1034.txt">RFC1034</a>,
* section 3, and <a href="http://www.ietf.org/rfc/rfc1123.txt">RFC1123</a>,
* section 2.1. No accommodation is provided for the specialized needs of
* other applications; if the domain name has been URL-encoded, for example,
* validation will fail even though the equivalent plaintext version of the
* same name would have passed.
* </p>
*
* <p>
* Validation is also provided for top-level domains (TLDs) as defined and
* maintained by the Internet Assigned Numbers Authority (IANA):
* </p>
*
* <ul>
* <li>{@link #isValidInfrastructureTld} - validates infrastructure TLDs
* (<code>.arpa</code>, etc.)</li>
* <li>{@link #isValidGenericTld} - validates generic TLDs
* (<code>.com, .org</code>, etc.)</li>
* <li>{@link #isValidCountryCodeTld} - validates country code TLDs
* (<code>.us, .uk, .cn</code>, etc.)</li>
* </ul>
*
* <p>
* (<b>NOTE</b>: This class does not provide IP address lookup for domain names or
* methods to ensure that a given domain name matches a specific IP; see
* {@link java.net.InetAddress} for that functionality.)
* </p>
*
* @version $Revision: 1781829 $
* @since Validator 1.4
*/
public class DomainValidator implements Serializable{
private static final int MAX_DOMAIN_LENGTH = 253;
private static final String[] EMPTY_STRING_ARRAY = new String[0];
private static final long serialVersionUID = -4407125112880174009L;
// Regular expression strings for hostnames (derived from RFC2396 and RFC 1123)
// RFC2396: domainlabel = alphanum | alphanum *( alphanum | "-" ) alphanum
// Max 63 characters
private static final String DOMAIN_LABEL_REGEX = "\\p{Alnum}(?>[\\p{Alnum}-]{0,61}\\p{Alnum})?";
// RFC2396 toplabel = alpha | alpha *( alphanum | "-" ) alphanum
// Max 63 characters
private static final String TOP_LABEL_REGEX = "\\p{Alpha}(?>[\\p{Alnum}-]{0,61}\\p{Alnum})?";
// RFC2396 hostname = *( domainlabel "." ) toplabel [ "." ]
// Note that the regex currently requires both a domain label and a top level label, whereas
// the RFC does not. This is because the regex is used to detect if a TLD is present.
// If the match fails, input is checked against DOMAIN_LABEL_REGEX (hostnameRegex)
// RFC1123 sec 2.1 allows hostnames to start with a digit
private static final String DOMAIN_NAME_REGEX = "^(?:" + DOMAIN_LABEL_REGEX + "\\.)+" + "(" + TOP_LABEL_REGEX
+ ")\\.?$";
private final boolean allowLocal;
/**
* Singleton instance of this validator, which
* doesn't consider local addresses as valid.
*/
private static final DomainValidator DOMAIN_VALIDATOR = new DomainValidator(false);
/**
* Singleton instance of this validator, which does
* consider local addresses valid.
*/
private static final DomainValidator DOMAIN_VALIDATOR_WITH_LOCAL = new DomainValidator(true);
/**
* RegexValidator for matching domains.
*/
private final RegexValidator domainRegex = new RegexValidator(DOMAIN_NAME_REGEX);
/**
* RegexValidator for matching a local hostname
*/
// RFC1123 sec 2.1 allows hostnames to start with a digit
private final RegexValidator hostnameRegex = new RegexValidator(DOMAIN_LABEL_REGEX);
/**
* Returns the singleton instance of this validator. It
* will not consider local addresses as valid.
*
* @return the singleton instance of this validator
*/
public static synchronized DomainValidator getInstance(){
inUse = true;
return DOMAIN_VALIDATOR;
}
/**
* Returns the singleton instance of this validator,
* with local validation as required.
*
* @param allowLocal
* Should local addresses be considered valid?
* @return the singleton instance of this validator
*/
public static synchronized DomainValidator getInstance(boolean allowLocal){
inUse = true;
if (allowLocal){
return DOMAIN_VALIDATOR_WITH_LOCAL;
}
return DOMAIN_VALIDATOR;
}
/** Private constructor. */
private DomainValidator(boolean allowLocal){
this.allowLocal = allowLocal;
}
/**
* Returns true if the specified <code>String</code> parses
* as a valid domain name with a recognized top-level domain.
* The parsing is case-insensitive.
*
* @param domain
* the parameter to check for domain name syntax
* @return true if the parameter is a valid domain name
*/
public boolean isValid(String domain){
if (domain == null){
return false;
}
domain = unicodeToASCII(domain);
// hosts must be equally reachable via punycode and Unicode;
// Unicode is never shorter than punycode, so check punycode
// if domain did not convert, then it will be caught by ASCII
// checks in the regexes below
if (domain.length() > MAX_DOMAIN_LENGTH){
return false;
}
String[] groups = domainRegex.match(domain);
if (groups != null && groups.length > 0){
return isValidTld(groups[0]);
}
return allowLocal && hostnameRegex.isValid(domain);
}
// package protected for unit test access
// must agree with isValid() above
final boolean isValidDomainSyntax(String domain){
if (domain == null){
return false;
}
domain = unicodeToASCII(domain);
// hosts must be equally reachable via punycode and Unicode;
// Unicode is never shorter than punycode, so check punycode
// if domain did not convert, then it will be caught by ASCII
// checks in the regexes below
if (domain.length() > MAX_DOMAIN_LENGTH){
return false;
}
String[] groups = domainRegex.match(domain);
return (groups != null && groups.length > 0) || hostnameRegex.isValid(domain);
}
/**
* Returns true if the specified <code>String</code> matches any
* IANA-defined top-level domain. Leading dots are ignored if present.
* The search is case-insensitive.
*
* @param tld
* the parameter to check for TLD status, not null
* @return true if the parameter is a TLD
*/
public boolean isValidTld(String tld){
tld = unicodeToASCII(tld);
if (allowLocal && isValidLocalTld(tld)){
return true;
}
return isValidInfrastructureTld(tld) || isValidGenericTld(tld) || isValidCountryCodeTld(tld);
}
/**