#!/usr/bin/python
# -*- coding: UTF-8 -*-
import pandas as pd
import weka.core.jvm as jvm
from weka.core.converters import Loader
from weka.classifiers import Classifier
from weka.classifiers import FilteredClassifier
from weka.classifiers import Evaluation
from weka.core.classes import Random
import weka.core.serialization as serialization
import os
def TrainingModel(csv, modelOutput_1,modelOutput_2,modelOutput_3,modelOutput_4,modelOutput_5):
# 导入训练集
loader = Loader(classname="weka.core.converters.CSVLoader")
train = loader.load_file(csv)
# 注意这里,要设定class是在哪里
train.class_is_last()
Classifier_1 = Classifier(classname="weka.classifiers.functions.Logistic")
Classifier_1.build_classifier(train)
# 保存模型
Classifier_1.serialize(modelOutput_1, header=train)
Classifier_2 = Classifier(classname="weka.classifiers.bayes.NaiveBayes")
Classifier_2.build_classifier(train)
# 保存模型
Classifier_2.serialize(modelOutput_2, header=train)
Classifier_3 = Classifier(classname="weka.classifiers.lazy.IBk")
Classifier_3.build_classifier(train)
# 保存模型
Classifier_3.serialize(modelOutput_3, header=train)
Classifier_4 = Classifier(classname="weka.classifiers.trees.REPTree")
Classifier_4.build_classifier(train)
# 保存模型
Classifier_4.serialize(modelOutput_4, header=train)
Classifier_5 = Classifier(classname="weka.classifiers.functions.SMO")
Classifier_5.build_classifier(train)
# 保存模型
Classifier_5.serialize(modelOutput_5, header=train)
def TrainingRegressionModel(csv, modelOutput_1,modelOutput_2,modelOutput_3,modelOutput_4,modelOutput_5):
# 导入训练集
loader = Loader(classname="weka.core.converters.CSVLoader")
train = loader.load_file(csv)
# 注意这里,要设定class是在哪里
train.class_is_last()
Classifier_1 = Classifier(classname="weka.classifiers.functions.LinearRegression")
Classifier_1.build_classifier(train)
# 保存模型
Classifier_1.serialize(modelOutput_1, header=train)
Classifier_2 = Classifier(classname="weka.classifiers.functions.SMOreg")
Classifier_2.build_classifier(train)
# 保存模型
Classifier_2.serialize(modelOutput_2, header=train)
Classifier_3 = Classifier(classname="weka.classifiers.lazy.IBk")
Classifier_3.build_classifier(train)
# 保存模型
Classifier_3.serialize(modelOutput_3, header=train)
Classifier_4 = Classifier(classname="weka.classifiers.trees.REPTree")
Classifier_4.build_classifier(train)
# 保存模型
Classifier_4.serialize(modelOutput_4, header=train)
Classifier_5 = Classifier(classname="weka.classifiers.functions.MultilayerPerceptron")
Classifier_5.build_classifier(train)
# 保存模型
Classifier_5.serialize(modelOutput_5, header=train)
def TestClassification(csv, title, true_title, modelInput, results):
data = pd.read_csv(csv)
total = data.index.stop
correct_num = 0
# 导入分析模型
objects = serialization.read_all(modelInput)
clsf = Classifier(jobject=objects[0])
# 导入测试组
loader = Loader(classname="weka.core.converters.CSVLoader")
test = loader.load_file(csv)
test.class_is_last()
# 分析结果
for index, inst in enumerate(test):
pred = clsf.classify_instance(inst)
dist = clsf.distribution_for_instance(inst)
origin = inst.get_string_value(inst.class_index)
prediction = inst.class_attribute.value(int(pred))
data.loc[index, title] = prediction;
isTrue = data.loc[index, title] == str(True)
if isTrue == data.loc[index, true_title]:
correct_num = correct_num + 1
accuracy = correct_num/total
print(accuracy * 100)
data.to_csv(results,index =False ,sep = ',')
def TestRegression(csv, title, true_title, modelInput, results):
data = pd.read_csv(csv)
total = data.index.stop
correct_num = 0
# 导入分析模型
objects = serialization.read_all(modelInput)
clsf = Classifier(jobject=objects[0])
# 导入测试组
loader = Loader(classname="weka.core.converters.CSVLoader")
test = loader.load_file(csv)
test.class_is_last()
# 分析结果
MAE = 0
RMSE = 0
for index, inst in enumerate(test):
pred = clsf.classify_instance(inst)
dist = clsf.distribution_for_instance(inst)
data.loc[index, title] = pred;
MAE = MAE + abs(data.loc[index,true_title] - data.loc[index,title])
RMSE = RMSE + abs(data.loc[index,true_title] - data.loc[index,title]) ** 2
MAE = MAE / total
RMSE = RMSE / total
RMSE = RMSE ** 0.5
print("MAE : " + str(MAE) + " and RMSE : " + str(RMSE))
data.to_csv(results,index =False ,sep = ',')
folder = os.path.exists("./result")
if not folder:
os.makedirs("./result")
# 启动java虚拟机
jvm.start()
TrainingModel("Classification_train_1.csv", "model_1_1.model", "model_1_2.model", "model_1_3.model", "model_1_4.model", "model_1_5.model")
TestClassification("Classification_test_1.csv", "predicated_aging", "aging_1", "model_1_1.model", "./result/result_1_1.csv")
TestClassification("Classification_test_1.csv", "predicated_aging", "aging_1", "model_1_2.model", "./result/result_1_2.csv")
TestClassification("Classification_test_1.csv", "predicated_aging", "aging_1", "model_1_3.model", "./result/result_1_3.csv")
TestClassification("Classification_test_1.csv", "predicated_aging", "aging_1", "model_1_4.model", "./result/result_1_4.csv")
TestClassification("Classification_test_1.csv", "predicated_aging", "aging_1", "model_1_5.model", "./result/result_1_5.csv")
TrainingModel("Classification_train_2.csv", "model_2_1.model", "model_2_2.model", "model_2_3.model", "model_2_4.model", "model_2_5.model")
TestClassification("Classification_test_2.csv", "predicated_aging", "aging_2", "model_2_1.model", "./result/result_2_1.csv")
TestClassification("Classification_test_2.csv", "predicated_aging", "aging_2", "model_2_2.model", "./result/result_2_2.csv")
TestClassification("Classification_test_2.csv", "predicated_aging", "aging_2", "model_2_3.model", "./result/result_2_3.csv")
TestClassification("Classification_test_2.csv", "predicated_aging", "aging_2", "model_2_4.model", "./result/result_2_4.csv")
TestClassification("Classification_test_2.csv", "predicated_aging", "aging_2", "model_2_5.model", "./result/result_2_5.csv")
TrainingModel("Classification_train_3.csv", "model_3_1.model", "model_3_2.model", "model_3_3.model", "model_3_4.model", "model_3_5.model")
TestClassification("Classification_test_3.csv", "predicated_aging", "aging_3", "model_3_1.model", "./result/result_3_1.csv")
TestClassification("Classification_test_3.csv", "predicated_aging", "aging_3", "model_3_2.model", "./result/result_3_2.csv")
TestClassification("Classification_test_3.csv", "predicated_aging", "aging_3", "model_3_3.model", "./result/result_3_3.csv")
TestClassification("Classification_test_3.csv", "predicated_aging", "aging_3", "model_3_4.model", "./result/result_3_4.csv")
TestClassification("Classification_test_3.csv", "predicated_aging", "aging_3", "model_3_5.model", "./result/result_3_5.csv")
#回归
TrainingRegressionModel("Regression_train_1.csv", "Regression_model_1_1.model", "Regression_model_1_2.model", "Regression_model_1_3.model", "Regression_model_1_4.model", "Regression_model_1_5.model")
TestRegression("Regression_test_1.csv", "predicated_time", "crash_time_1", "Regression_model_1_1.model", "./result/Regression_result_1_1.csv")
TestRegression("Regression_test_1.csv", "predicated_time", "crash_time_1", "Regression_model_1_2.model", "./result/Regression_result_1_2.csv")
TestRegression("Regression_test_1.csv", "predicated_time", "crash_time_1", "Regression_model_1_3.model", "./result/Regression_result_1_3.csv")
TestRegression("Regression_test_1.csv", "predicated_time", "crash_time_1", "Regression_model_1_4.model", "./result/Regression_result_1_4.csv")
TestRegression("Regression_test_1.csv", "predicated_time", "crash_time_1", "Regression_model_1_5.model"