/*
* Copyright 2001-2004 The Apache Software Foundation.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org1.apache.commons.codec.language;
import org1.apache.commons.codec.EncoderException;
import org1.apache.commons.codec.StringEncoder;
/**
* Encodes a string into a double metaphone value.
* This Implementation is based on the algorithm by <CITE>Lawrence Philips</CITE>.
* <ul>
* <li>Original Article: <a
* href="http://www.cuj.com/documents/s=8038/cuj0006philips/">
* http://www.cuj.com/documents/s=8038/cuj0006philips/</a></li>
* <li>Original Source Code: <a href="ftp://ftp.cuj.com/pub/2000/1806/philips.zip">
* ftp://ftp.cuj.com/pub/2000/1806/philips.zip</a></li>
* </ul>
*
* @author Apache Software Foundation
* @version $Id: DoubleMetaphone.java,v 1.24 2004/06/05 18:32:04 ggregory Exp $
*/
public class DoubleMetaphone implements StringEncoder {
/**
* "Vowels" to test for
*/
private static final String VOWELS = "AEIOUY";
/**
* Prefixes when present which are not pronounced
*/
private static final String[] SILENT_START =
{ "GN", "KN", "PN", "WR", "PS" };
private static final String[] L_R_N_M_B_H_F_V_W_SPACE =
{ "L", "R", "N", "M", "B", "H", "F", "V", "W", " " };
private static final String[] ES_EP_EB_EL_EY_IB_IL_IN_IE_EI_ER =
{ "ES", "EP", "EB", "EL", "EY", "IB", "IL", "IN", "IE", "EI", "ER" };
private static final String[] L_T_K_S_N_M_B_Z =
{ "L", "T", "K", "S", "N", "M", "B", "Z" };
/**
* Maximum length of an encoding, default is 4
*/
protected int maxCodeLen = 4;
/**
* Creates an instance of this DoubleMetaphone encoder
*/
public DoubleMetaphone() {
super();
}
/**
* Encode a value with Double Metaphone
*
* @param value String to encode
* @return an encoded string
*/
public String doubleMetaphone(String value) {
return doubleMetaphone(value, false);
}
/**
* Encode a value with Double Metaphone, optionally using the alternate
* encoding.
*
* @param value String to encode
* @param alternate use alternate encode
* @return an encoded string
*/
public String doubleMetaphone(String value, boolean alternate) {
value = cleanInput(value);
if (value == null) {
return null;
}
boolean slavoGermanic = isSlavoGermanic(value);
int index = isSilentStart(value) ? 1 : 0;
DoubleMetaphoneResult result = new DoubleMetaphoneResult(this.getMaxCodeLen());
while (!result.isComplete() && index <= value.length() - 1) {
switch (value.charAt(index)) {
case 'A':
case 'E':
case 'I':
case 'O':
case 'U':
case 'Y':
index = handleAEIOUY(value, result, index);
break;
case 'B':
result.append('P');
index = charAt(value, index + 1) == 'B' ? index + 2 : index + 1;
break;
case '\u00C7':
// A C with a Cedilla
result.append('S');
index++;
break;
case 'C':
index = handleC(value, result, index);
break;
case 'D':
index = handleD(value, result, index);
break;
case 'F':
result.append('F');
index = charAt(value, index + 1) == 'F' ? index + 2 : index + 1;
break;
case 'G':
index = handleG(value, result, index, slavoGermanic);
break;
case 'H':
index = handleH(value, result, index);
break;
case 'J':
index = handleJ(value, result, index, slavoGermanic);
break;
case 'K':
result.append('K');
index = charAt(value, index + 1) == 'K' ? index + 2 : index + 1;
break;
case 'L':
index = handleL(value, result, index);
break;
case 'M':
result.append('M');
index = conditionM0(value, index) ? index + 2 : index + 1;
break;
case 'N':
result.append('N');
index = charAt(value, index + 1) == 'N' ? index + 2 : index + 1;
break;
case '\u00D1':
// N with a tilde (spanish ene)
result.append('N');
index++;
break;
case 'P':
index = handleP(value, result, index);
break;
case 'Q':
result.append('K');
index = charAt(value, index + 1) == 'Q' ? index + 2 : index + 1;
break;
case 'R':
index = handleR(value, result, index, slavoGermanic);
break;
case 'S':
index = handleS(value, result, index, slavoGermanic);
break;
case 'T':
index = handleT(value, result, index);
break;
case 'V':
result.append('F');
index = charAt(value, index + 1) == 'V' ? index + 2 : index + 1;
break;
case 'W':
index = handleW(value, result, index);
break;
case 'X':
index = handleX(value, result, index);
break;
case 'Z':
index = handleZ(value, result, index, slavoGermanic);
break;
default:
index++;
break;
}
}
return alternate ? result.getAlternate() : result.getPrimary();
}
/**
* Encode the value using DoubleMetaphone. It will only work if
* <code>obj</code> is a <code>String</code> (like <code>Metaphone</code>).
*
* @param obj Object to encode (should be of type String)
* @return An encoded Object (will be of type String)
* @throws EncoderException encode parameter is not of type String
*/
public Object encode(Object obj) throws EncoderException {
if (!(obj instanceof String)) {
throw new EncoderException("DoubleMetaphone encode parameter is not of type String");
}
return doubleMetaphone((String) obj);
}
/**
* Encode the value using DoubleMetaphone.
*
* @param value String to encode
* @return An encoded String
*/
public String encode(String value) {
return doubleMetaphone(value);
}
/**
* Check if the Double Metaphone values of two <code>String</code> values
* are equal.
*
* @param value1 The left-hand side of the encoded {@link String#equals(Object)}.
* @param value2 The right-hand side of the encoded {@link String#equals(Object)}.
* @return <code>true</code> if the encoded <code>String</code>s are equal;
* <code>false</code> otherwise.
* @see #isDoubleMetaphoneEqual(String,String,boolean)