/*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/
/*
* IBk.java
* Copyright (C) 1999 Stuart Inglis,Len Trigg,Eibe Frank
*
*/
package weka.classifiers.lazy;
import weka.classifiers.Classifier;
import weka.classifiers.Evaluation;
import weka.classifiers.UpdateableClassifier;
import java.io.*;
import java.util.*;
import weka.core.*;
/**
* <i>K</i>-nearest neighbours classifier. For more information, see <p>
*
* Aha, D., and D. Kibler (1991) "Instance-based learning algorithms",
* <i>Machine Learning</i>, vol.6, pp. 37-66.<p>
*
* Valid options are:<p>
*
* -K num <br>
* Set the number of nearest neighbors to use in prediction
* (default 1) <p>
*
* -W num <br>
* Set a fixed window size for incremental train/testing. As
* new training instances are added, oldest instances are removed
* to maintain the number of training instances at this size.
* (default no window) <p>
*
* -I <br>
* Neighbors will be weighted by the inverse of their distance
* when voting. (default equal weighting) <p>
*
* -F <br>
* Neighbors will be weighted by their similarity when voting.
* (default equal weighting) <p>
*
* -X <br>
* Select the number of neighbors to use by hold-one-out cross
* validation, with an upper limit given by the -K option. <p>
*
* -E <br>
* When k is selected by cross-validation for numeric class attributes,
* minimize mean-squared error. (default mean absolute error) <p>
*
* -N <br>
* Turns off normalization. <p>
*
* @author Stuart Inglis (singlis@cs.waikato.ac.nz)
* @author Len Trigg (trigg@cs.waikato.ac.nz)
* @author Eibe Frank (eibe@cs.waikato.ac.nz)
* @version $Revision: 1.32 $
*/
public class IBk extends Classifier implements
OptionHandler, UpdateableClassifier, WeightedInstancesHandler {
/**
* Returns a string describing classifier
* @return a description suitable for
* displaying in the explorer/experimenter gui
*/
public String globalInfo() {
return "K-nearest neighbours classifier. Normalizes attributes by default. Can "
+ "select appropriate value of K based on cross-validation. Can also do "
+ "distance weighting. For more information, see\n\n"
+ "Aha, D., and D. Kibler (1991) \"Instance-based learning algorithms\", "
+ "Machine Learning, vol.6, pp. 37-66.";
}
/*
* A class for storing data about a neighboring instance
*/
protected class NeighborNode {
/** The neighbor instance */
protected Instance m_Instance;
/** The distance from the current instance to this neighbor */
protected double m_Distance;
/** A link to the next neighbor instance */
protected NeighborNode m_Next;
/**
* Create a new neighbor node.
*
* @param distance the distance to the neighbor
* @param instance the neighbor instance
* @param next the next neighbor node
*/
public NeighborNode(double distance, Instance instance, NeighborNode next){
m_Distance = distance;
m_Instance = instance;
m_Next = next;
}
/**
* Create a new neighbor node that doesn't link to any other nodes.
*
* @param distance the distance to the neighbor
* @param instance the neighbor instance
*/
public NeighborNode(double distance, Instance instance) {
this(distance, instance, null);
}
}
/*
* A class for a linked list to store the nearest k neighbours
* to an instance. We use a list so that we can take care of
* cases where multiple neighbours are the same distance away.
* i.e. the minimum length of the list is k.
*/
protected class NeighborList {
/** The first node in the list */
protected NeighborNode m_First;
/** The last node in the list */
protected NeighborNode m_Last;
/** The number of nodes to attempt to maintain in the list */
protected int m_Length = 1;
/**
* Creates the neighborlist with a desired length
*
* @param length the length of list to attempt to maintain
*/
public NeighborList(int length) {
m_Length = length;
}
/**
* Gets whether the list is empty.
*
* @return true if so
*/
public boolean isEmpty() {
return (m_First == null);
}
/**
* Gets the current length of the list.
*
* @return the current length of the list
*/
public int currentLength() {
int i = 0;
NeighborNode current = m_First;
while (current != null) {
i++;
current = current.m_Next;
}
return i;
}
/**
* Inserts an instance neighbor into the list, maintaining the list
* sorted by distance.
*
* @param distance the distance to the instance
* @param instance the neighboring instance
*/
public void insertSorted(double distance, Instance instance) {
if (isEmpty()) {
m_First = m_Last = new NeighborNode(distance, instance);
} else {
NeighborNode current = m_First;
if (distance < m_First.m_Distance) {// Insert at head
m_First = new NeighborNode(distance, instance, m_First);
} else { // Insert further down the list
for( ;(current.m_Next != null) &&
(current.m_Next.m_Distance < distance);
current = current.m_Next);
current.m_Next = new NeighborNode(distance, instance,
current.m_Next);
if (current.equals(m_Last)) {
m_Last = current.m_Next;
}
}
// Trip down the list until we've got k list elements (or more if the
// distance to the last elements is the same).
int valcount = 0;
for(current = m_First; current.m_Next != null;
current = current.m_Next) {
valcount++;
if ((valcount >= m_Length) && (current.m_Distance !=
current.m_Next.m_Distance)) {
m_Last = current;
current.m_Next = null;
break;
}
}
}
}
/**
* Prunes the list to contain the k nearest neighbors. If there are
* multiple neighbors at the k'th distance, all will be kept.
*
* @param k the number of neighbors to keep in the list.
*/
public void pruneToK(int k) {
if (isEmpty()) {
return;
}
if (k < 1) {
k = 1;
}
int currentK = 0;
double currentDist = m_First.m_Distance;
NeighborNode current = m_First;
for(; current.m_Next != null; current = current.m_Next) {
currentK++;
currentDist = current.m_Distance;
if ((currentK >= k) && (currentDist != current.m_Next.m_Distance)) {
m_Last = current;
current.m_Next = null;
break;
}
}
}
/**
* Prints out the contents of the neighborlist
*/
public void printList() {
if (isEmpty()) {
System.out.println("Empty list");
} else {
NeighborNode current = m_First;
while (current != null) {
System.out.println("Node: instance " + current.m_Instance
+ ", distance " + current.m_Distance);
current = current.m_Next;
}
System.out.println();
}
}
}
/** The training instances used for classification. */
protected Instances m_Train;
/** The number of class values (or 1 if predicting numeric) */
protected int m_NumClasses;
/** The class attribute type */
protected int m_ClassType;
/** The minimum values for numeric attributes. */
protected double [] m_Min;
/** The maximum values for numeric attributes. */
protected dou