# -*- coding:utf-8 -*-
#author TCY
#email tangchenyi@mail.hfut.edu.cn
#date 2015/04/03
import os
import math
import sys
class FeatureSelector:
#supported algorithms
TYPE_CHI="CHI"
TYPE_MI="MI"
TYPE_IG="IG"
Type=(TYPE_CHI, TYPE_MI, TYPE_IG)
def __init__(self, selectorType, srcDataPath, dstDicPath, numOfKind):
if(selectorType not in self.Type):
raise ValueError,"selectorType error"
self.__selectorType = selectorType
self.__srcDataPath = srcDataPath
self.__dstDicPath = dstDicPath
self.__numOfKind = numOfKind
pass
def select(self):
if(self.__selectorType == self.TYPE_CHI):
self.run_CHI()
elif(self.__selectorType == self.TYPE_MI):
self.run_MI()
elif(self.__selectorType == self.TYPE_IG):
self.run_IG()
pass
def run_CHI(self):
self.count()
self.calculate_CHI()
return self.__terms_value
pass
def run_MI(self):
self.count()
self.calculate_MI()
return self.__terms_value
pass
def run_IG(self):
self.count()
self.calculate_IG()
return self.__terms_value
pass
def count(self):
fp = open(self.__srcDataPath, "r")
terms_kind=dict()
numOfLine = 0;
numOfEachKind = [None]*self.__numOfKind
for i in range(self.__numOfKind):
numOfEachKind[i] = 0
for line in fp:
terms = line.split()
label=int(terms[0])
unique_terms=set()
for term in terms[1:]:
unique_terms.add(term)
for term in unique_terms:
if term not in terms_kind:
kind=[None]*self.__numOfKind
for i in range(self.__numOfKind):
kind[i] = 0
kind[label]=1
terms_kind[term]=kind
else:
kind=terms_kind[term]
kind[label]=kind[label]+1
numOfLine+=1
numOfEachKind[label]+=1
self.__numOfLine = numOfLine
self.__terms_kind=terms_kind
self.__numOfEachKind = numOfEachKind
pass
def calculate_CHI(self):
N = self.__numOfLine
terms_CHI = dict()
for term in self.__terms_kind.items():
kind = term[1]
numOfThisTerm = 0
CHIs = []
for i in range(self.__numOfKind):
numOfThisTerm += kind[i]
for i in range(self.__numOfKind):
A = kind[i]
B = numOfThisTerm - A
C = self.__numOfEachKind[i] - A
#D=N-A-B-C
D = N - numOfThisTerm - C
#CHI=N*(A*D-B*C)^2/((A+C)*(A+B)*(B+D)*(C+D))
CHI = N * math.pow((A*D-B*C),2)/((A+C)*(A+B)*(B+D)*(C+D))
CHIs.append(CHI)
CHIs.sort()
terms_CHI[term[0]]=CHIs[self.__numOfKind-1]
self.__terms_value = terms_CHI
def calculate_IG(self):
N = self.__numOfLine+0.0
terms_IG = dict()
C = [None]*self.__numOfKind
PC = [None]*self.__numOfKind
tmp_PC = 0;
for i in range(self.__numOfKind):
C[i] = self.__numOfEachKind[i]
PC[i] = C[i] / N
tmp_PC += PC[i] * math.log(PC[i], 2)
for term in self.__terms_kind.items():
kind = term[1]
numOfThisTerm = 0
for i in range(self.__numOfKind):
numOfThisTerm += kind[i]
A = numOfThisTerm
PA = A / N
PA_ = 1 - PA
tmp_PB = 0
tmp_PB_ = 0
for i in range(self.__numOfKind):
B = kind[i]
B_ = C[i] - B
PB = B / A
PB_ = B_ / (N - A)
if PB != 0:
tmp_PB += PB*math.log(PB,2)
if PB_ != 0:
tmp_PB_ += PB_*math.log(PB_,2)
IG = PA * tmp_PB + PA_ * tmp_PB_ - tmp_PC
terms_IG[term[0]]=IG
self.__terms_value = terms_IG
pass
def calculate_MI(self):
N = self.__numOfLine
terms_MI = dict()
for term in self.__terms_kind.items():
kind = term[1]
numOfThisTerm = 0
MIs = []
for i in range(self.__numOfKind):
numOfThisTerm += kind[i]
for i in range(self.__numOfKind):
A = kind[i]
B = numOfThisTerm - A
C = self.__numOfEachKind[i] - A
#MI=lg(A*N/((A+C)(A+B))
if A != 0:
MI = math.log(N*A/((A+C)*(A+B)))
else :
MI = 0
MIs.append(MI)
MIs.sort()
terms_MI[term[0]]=MIs[self.__numOfKind-1]
self.__terms_value = terms_MI
pass
def printMessage(self):
fp = open(self.__dstDicPath, "w")
for item in self.__terms_value.items():
fp.write("%s\t%f\n"%item)
fp.close()
if __name__ == "__main__":
selector= FeatureSelector("IG", "test.txt", "dic.txt", 3)
selector.select()
selector.printMessage()
采用基于遗传算法(GA)的二维主成分分析法(2DPCA)进行人脸识别。
版权申诉
136 浏览量
2023-12-27
15:13:35
上传
评论
收藏 2KB ZIP 举报
m0_74456535
- 粉丝: 89
- 资源: 725
最新资源
- 基于Javascript和Python的微商城项目设计源码 - MicroMall
- 基于Java的网上订餐系统设计源码 - online ordering system
- 基于Javascript的超级美眉网络资源管理应用模块设计源码
- 基于Typescript和PHP的编程知识储备库设计源码 - study-php
- Screenshot_2024-05-28-11-40-58-177_com.tencent.mm.jpg
- 基于Dart的Flutter小提琴调音器APP设计源码 - violinhelper
- 基于JavaScript和CSS的随寻订购网页设计源码 - web-order
- 基于MATLAB的声纹识别系统设计源码 - VoiceprintRecognition
- 基于Java的微服务插件集合设计源码 - wsy-plugins
- 基于Vue和微信小程序的监理日志系统设计源码 - supervisionLog
资源上传下载、课程学习等过程中有任何疑问或建议,欢迎提出宝贵意见哦~我们会及时处理!
点击此处反馈