LOF.rar_LOF_lof算法资源-CSDN文库

共2个文件

py：2个

版权申诉

lof算法

148 浏览量 2022-09-24 10:21:39 上传评论收藏 2KB RAR 举报

资源详情

资源评论

资源推荐

收起资源包目录

LOF.rar （2个子文件）

LOF.py 2KB

iForest.py 3KB

import numpy as np import matplotlib.pyplot as plt from sklearn.ensemble import IsolationForest rng = np.random.RandomState(100) #种子数量 # Generate train data X = 0.3 * rng.randn(100, 2) # print("输出X:") # print(X.size) #创建100*2的训练样本数组， X_train = np.r_[X + 2, X - 2] # print("输出X_train:") print(X_train.size) #将样本中的所有数据分别+2 ，-2，np.r_()并将这两个结果合并到一维数组。 #关于np.r_()与np.c_()可见特别说明2 # Generate some regular novel observations X = 0.3 * rng.randn(20, 2) X_test = np.r_[X + 2, X - 2] print(X_test.size) # Generate some abnormal novel observations X_outliers = rng.uniform(low=-5, high=5, size=(20, 2)) print(X_outliers.size) #生成最大为4，最小-4的20*2的数组。 # fit the model clf = IsolationForest(n_estimators=100,max_samples='auto',contamination=0.1,max_features=1.,bootstrap=True,n_jobs=1,random_state=None,verbose=0) # clf = IsolationForest(behaviour='new', max_samples=100, # random_state=rng, contamination='auto') #创建对象并对模型做配置，最大样本数量100。 clf.fit(X_train) #用训练数据拟合模型 y_pred_train = clf.predict(X_train) #用拟合好的模型去分类训练数据 y_pred_test = clf.predict(X_test) #用拟合好的模型去分类测试数据 ,该数据样本与训练数据差不多相同。 y_pred_outliers = clf.predict(X_outliers) #用拟合好的模型去分类测试数据,该数据样本与训练数据有些偏差。取的是-5 到5 范围内。 for i,tree in enumerate(y_pred_test): print(X_test[i],tree) #tree为-1为异常，1为正常。特别说明1中详述。 ###后面都是图片展示的功能了，如果无需可视化，可直接注销掉后面所有代码。 # plot the line, the samples, and the nearest vectors to the plane xx, yy = np.meshgrid(np.linspace(-10, 10, 50), np.linspace(-10, 10, 50)) #生成-5到5之间，包含50个数字的等差数列。如np.linspace(0,100,5) 则生成[0 25 50 75 100] #meshgrid生成网格函数，xx,yy说明为二维网格函数 Z = clf.decision_function(np.c_[xx.ravel(), yy.ravel()]) #.ravel()为降一维-行序优先 # np.c_矩阵相加 #decision_function为样本距离超平面的距离。 Z = Z.reshape(xx.shape) #reshapce()将某个数组维度调整为和另一数组维度相同 plt.title("IsolationForest") plt.contourf(xx, yy,Z, cmap=plt.cm.Blues_r) b1 = plt.scatter(X_train[:, 0], X_train[:, 1], c='white', s=20, edgecolor='k') b2 = plt.scatter(X_test[:, 0], X_test[:, 1], c='green', s=20, edgecolor='k') c = plt.scatter(X_outliers[:, 0], X_outliers[:, 1], c='red', s=20, edgecolor='k') plt.axis('tight') plt.xlim((-10, 10)) plt.ylim((-10, 10)) plt.legend([b1, b2, c], ["training observations", "new regular observations", "new abnormal observations"], loc="upper left") plt.show()