import numpy as npimport pandas as pdimport shapimport osfrom sklearn.preprocessing import StandardScalerfrom sklearn.model_selection import train_test_split, GridSearchCVfrom sklearn.pipeline import Pipelinefrom sklearn import svmfrom sklearn.neighbors import KNeighborsClassifierimport matplotlib.pyplot as pltdataDir = r'./data'outDir = r'./output'data = pd.read_csv(os.path.join(dataDir,'Data_sim.csv'))X = data.iloc[:, 6:29] # Select all featuresy = data['Defect_type']# Split data into training and validation setstrain_X, val_X, train_y, val_y = train_test_split(X, y, train_size=1600, test_size=400, random_state=42)from sklearn.manifold import TSNEimport seaborn as snsscaler = StandardScalerX_scaled = scaler.fit_transform(train_X)# Perform t-SNE on the datasettsne = TSNE(n_components=2, random_state=42)X_tsne = tsne.fit_transform(X_scaled) # X_scaled is the feature set aftwe scaling# Create a DataFrame with the t-SNE results and the labelstsne_df = pd.DataFrame(X_tsne, columns=['TSNE1', 'TSNE2'])tsne_df['Defect_type'] = train_y.values # y contains the labels# Plot the t-SNE resultplt.figure(figsize=(10, 8))sns.scatterplot(x='TSNE1', y='TSNE2', hue='Defect_type', data=tsne_df, palette='viridis')plt.title('t-SNE Visualization of the Dataset')plt.xlabel('t-SNE Dimension 1')plt.ylabel('t-SNE Dimension 2')plt.legend(title='Defect Type')plt.tight_layoutplt.show# Define SVM pipeline and hyperparameter gridsvm_pipeline = Pipeline([("scaler", StandardScaler), ("svm", svm.SVC(probability=True, decision_function_shape="ovo"))])svm_param_grid = {'svm__C': np.linspace(1, 100, 50), 'svm__kernel': ['poly', 'rbf', 'sigmoid']}svm_clf = GridSearchCV(svm_pipeline, param_grid=svm_param_grid, cv=5, scoring='accuracy')svm_clf.fit(train_X, train_y)print('Best SVM score:', svm_clf.best_score_)# Define kNN pipeline and hyperparameter gridknn_pipeline = Pipeline([("scaler", StandardScaler), ("knn", KNeighborsClassifier)])knn_param_grid = {'knn__n_neighbors': range(1, 11)}knn_clf = GridSearchCV(knn_pipeline, param_grid=knn_param_grid, cv=5, scoring='accuracy')knn_clf.fit(train_X, train_y)print('Best kNN score:', knn_clf.best_score_)Best SVM score: 0.999375Best kNN score: 0.9981250000000002evalPoints = 200# SHAP computation for SVMsvm_explainer = shap.KernelExplainer(svm_clf.predict_proba, shap.kmeans(train_X, 50))svm_shap_values = svm_explainer.shap_values(train_X.iloc[1:evalPoints, :].values)# SHAP computation for kNNknn_explainer = shap.KernelExplainer(knn_clf.predict_proba, shap.kmeans(train_X, 50))knn_shap_values = knn_explainer.shap_values(train_X.iloc[1:evalPoints, :].values)for i in range(3):# Extract SHAP values for the i-th class for each samplesvm_class_shap_values = np.array([sv[:, i] for sv in svm_shap_values])knn_class_shap_values = np.array([kv[:, i] for kv in knn_shap_values])# SVM plotsshap.summary_plot(svm_class_shap_values, train_X.iloc[1:evalPoints, :], max_display=10, show=False)plt.title(f'SVM Class {i}')plt.savefig(os.path.join(outDir, f'SHAP_SVM_Class_{i}.png'), dpi=300, bbox_inches='tight')plt.show # This displays the plot in the notebookplt.clf # Clears the figure after saving and displaying# kNN plotsshap.summary_plot(knn_class_shap_values, train_X.iloc[1:evalPoints, :], max_display=10, show=False)plt.title(f'kNN Class {i}')plt.savefig(os.path.join(outDir, f'SHAP_kNN_Class_{i}.png'), dpi=300, bbox_inches='tight')plt.show # This displays the plot in the notebookplt.clf # Clears the figure after saving and displayingimport numpy as npimport matplotlib.pyplot as pltimport osfor i in range(3): # SVM: Calculate and sort mean SHAP valuesmean_svm_shap_values = np.mean(np.abs([sv[:, i] for sv in svm_shap_values]), axis=0)sorted_indices_svm = np.argsort(mean_svm_shap_values)[::-1] # Sort indices in descending ordersorted_svm_shap_values = mean_svm_shap_values[sorted_indices_svm][:10] # Keep top 10sorted_svm_features = train_X.columns[sorted_indices_svm][:10] # Keep top 10 features# kNN: Calculate and sort mean SHAP valuesmean_knn_shap_values = np.mean(np.abs([kv[:, i] for kv in knn_shap_values]), axis=0)sorted_indices_knn = np.argsort(mean_knn_shap_values)[::-1] # Sort indices in descending ordersorted_knn_shap_values = mean_knn_shap_values[sorted_indices_knn][:10] # Keep top 10sorted_knn_features = train_X.columns[sorted_indices_knn][:10] # Keep top 10 features# Plot SVM SHAP values (Top 10 features)plt.figure(figsize=(8, 6))plt.barh(sorted_svm_features[::-1], sorted_svm_shap_values[::-1], color='blue') # Reverse for descending orderplt.xlabel("mean(|SHAP value|) (average impact on model output magnitude)")plt.title(f"Top 10 SHAP Values for Features (SVM Class {i})")plt.tight_layoutplt.savefig(os.path.join(outDir, f'Top10_SHAP_Values_SVM_Class_{i}.png'), dpi=300, bbox_inches='tight')plt.showplt.clf# Plot kNN SHAP values (Top 10 features)plt.figure(figsize=(8, 6))plt.barh(sorted_knn_features[::-1], sorted_knn_shap_values[::-1], color='green') # Reverse for descending orderplt.xlabel("mean(|SHAP value|) (average impact on model output magnitude)")plt.title(f"Top 10 SHAP Values for Features (kNN Class {i})")plt.tight_layoutplt.savefig(os.path.join(outDir, f'Top10_SHAP_Values_kNN_Class_{i}.png'), dpi=300, bbox_inches='tight')plt.showplt.clf摘要:import numpy as npimport pandas as pdimport shapimport osfrom sklearn.preprocessing import StandardScalerfrom sklearn.model_select
知乎学术咨询:
https://www.zhihu.com/consult/people/792359672131756032?isMe=1
担任《Mechanical System and Signal Processing》《中国电机工程学报》等期刊审稿专家,擅长领域:信号滤波/降噪,机器学习/深度学习,时间序列预分析/预测,设备故障诊断/缺陷检测/异常检测。
分割线分割线分割线
Python环境下基于机器学习和深度学习的轴承故障诊断方法
算法程序运行环境为Python,采用tensorflow,keras和sklearn等模块,执行基于机器学习和深度学习模型的轴承故障诊断。主要内容包括:
[1]基于一维深度残差收缩网络DRSN的轴承故障诊断
[3]基于门控循环单元GRU,Inception网络,LSTM网络,随机森林的轴承故障诊断
所用模块版本:
tensorflow=2.8.0keras=2.8.0sklearn=1.0.2完整代码可通过知乎付费咨询获得:
基于脉冲小波的旋转机械故障诊断(MATLAB)
基于一种改进熵方法的旋转机械故障诊断模型(MATLAB)
来源:科技大房车
免责声明:本站系转载,并不代表本网赞同其观点和对其真实性负责。如涉及作品内容、版权和其它问题,请在30日内与本站联系,我们将在第一时间删除内容!