import json
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import confusion_matrix, classification_report
import pandas as pd
from google.colab import files
# 1. 파일 업로드 (Colab에서 실행)
print(“training_results.json 파일을 업로드하세요:”)
uploaded = files.upload()
# 2. 데이터 로드
with open(‘training_results.json’, ‘r’, encoding=’utf-8′) as f:
data = json.load(f)
# 데이터 추출
history = data[‘history’]
cm = np.array(data[‘confusion_matrix’])
class_names = data[‘class_names’]
classification_report_dict = data[‘classification_report’]
true_classes = np.array(data[‘true_classes’])
predicted_classes = np.array(data[‘predicted_classes’])
predictions_proba = np.array(data[‘predictions_proba’])
print(f”클래스 수: {len(class_names)}”)
print(f”클래스 이름: {class_names}”)
print(f”검증 샘플 수: {len(true_classes)}”)
# 3. 학습 히스토리 시각화
plt.figure(figsize=(15, 5))
# 정확도 그래프
plt.subplot(1, 2, 1)
plt.plot(history[‘accuracy’], label=’Training Accuracy’, marker=’o’)
plt.plot(history[‘val_accuracy’], label=’Validation Accuracy’, marker=’s’)
plt.title(‘Model Accuracy’)
plt.xlabel(‘Epoch’)
plt.ylabel(‘Accuracy’)
plt.legend()
plt.grid(True, alpha=0.3)
# 손실 그래프
plt.subplot(1, 2, 2)
plt.plot(history[‘loss’], label=’Training Loss’, marker=’o’)
plt.plot(history[‘val_loss’], label=’Validation Loss’, marker=’s’)
plt.title(‘Model Loss’)
plt.xlabel(‘Epoch’)
plt.ylabel(‘Loss’)
plt.legend()
plt.grid(True, alpha=0.3)
plt.tight_layout()
plt.show()
# 4. Confusion Matrix 히트맵
plt.figure(figsize=(12, 10))
sns.heatmap(cm,
annot=True,
fmt=’d’,
cmap=’Greens’,
xticklabels=class_names,
yticklabels=class_names,
cbar_kws={‘label’: ‘Count’})
plt.title(‘Confusion Matrix’, fontsize=16, pad=20)
plt.xlabel(‘Predicted Label’, fontsize=12)
plt.ylabel(‘True Label’, fontsize=12)
plt.xticks(rotation=45, ha=’right’)
plt.yticks(rotation=0)
plt.tight_layout()
plt.show()
# 5. 정규화된 Confusion Matrix
cm_normalized = cm.astype(‘float’) / cm.sum(axis=1)[:, np.newaxis]
plt.figure(figsize=(12, 10))
sns.heatmap(cm_normalized,
annot=True,
fmt=’.2f’,
cmap=’Greens’,
xticklabels=class_names,
yticklabels=class_names,
cbar_kws={‘label’: ‘Proportion’})
plt.title(‘Normalized Confusion Matrix’, fontsize=16, pad=20)
plt.xlabel(‘Predicted Label’, fontsize=12)
plt.ylabel(‘True Label’, fontsize=12)
plt.xticks(rotation=45, ha=’right’)
plt.yticks(rotation=0)
plt.tight_layout()
plt.show()
# 6. 클래스별 성능 분석
# Classification Report를 DataFrame으로 변환
cr_df = pd.DataFrame(classification_report_dict).transpose()
cr_df = cr_df.iloc[:-3] # macro avg, weighted avg, accuracy 제외
# 클래스별 성능 시각화
plt.figure(figsize=(15, 8))
metrics = [‘precision’, ‘recall’, ‘f1-score’]
x = np.arange(len(class_names))
width = 0.25
for i, metric in enumerate(metrics):
plt.bar(x + i*width, cr_df[metric], width, label=metric.capitalize(), alpha=0.8)
plt.xlabel(‘Classes’)
plt.ylabel(‘Score’)
plt.title(‘Classification Performance by Class’)
plt.xticks(x + width, class_names, rotation=45, ha=’right’)
plt.legend()
plt.grid(True, alpha=0.3)
plt.tight_layout()
plt.show()
# 7. 클래스별 정확도 분석
class_accuracy = cm.diagonal() / cm.sum(axis=1)
plt.figure(figsize=(12, 6))
bars = plt.bar(class_names, class_accuracy, alpha=0.7, color=’skyblue’)
plt.xlabel(‘Classes’)
plt.ylabel(‘Accuracy’)
plt.title(‘Per-Class Accuracy’)
plt.xticks(rotation=45, ha=’right’)
plt.grid(True, alpha=0.3)
# 막대 위에 정확도 값 표시
for bar, acc in zip(bars, class_accuracy):
plt.text(bar.get_x() + bar.get_width()/2, bar.get_height() + 0.01,
f'{acc:.3f}’, ha=’center’, va=’bottom’)
plt.tight_layout()
plt.show()
# 8. 예측 확신도 분석
confidence_scores = np.max(predictions_proba, axis=1)
plt.figure(figsize=(12, 6))
plt.subplot(1, 2, 1)
plt.hist(confidence_scores, bins=30, alpha=0.7, color=’lightgreen’)
plt.xlabel(‘Confidence Score’)
plt.ylabel(‘Frequency’)
plt.title(‘Distribution of Prediction Confidence’)
plt.grid(True, alpha=0.3)
plt.subplot(1, 2, 2)
correct_predictions = (true_classes == predicted_classes)
plt.boxplot([confidence_scores[correct_predictions], confidence_scores[~correct_predictions]],
labels=[‘Correct’, ‘Incorrect’])
plt.ylabel(‘Confidence Score’)
plt.title(‘Confidence Score by Prediction Correctness’)
plt.grid(True, alpha=0.3)
plt.tight_layout()
plt.show()
# 9. 성능 요약 출력
print(“\n” + “=”*50)
print(“PERFORMANCE SUMMARY”)
print(“=”*50)
total_accuracy = np.sum(cm.diagonal()) / np.sum(cm)
print(f”Overall Accuracy: {total_accuracy:.4f}”)
print(f”Final Training Accuracy: {history[‘accuracy’][-1]:.4f}”)
print(f”Final Validation Accuracy: {history[‘val_accuracy’][-1]:.4f}”)
print(f”Best Validation Accuracy: {max(history[‘val_accuracy’]):.4f}”)
print(f”\nPer-class Accuracy:”)
for i, (class_name, acc) in enumerate(zip(class_names, class_accuracy)):
print(f” {class_name}: {acc:.4f}”)
print(f”\nWorst performing classes:”)
worst_classes = np.argsort(class_accuracy)[:3]
for idx in worst_classes:
print(f” {class_names[idx]}: {class_accuracy[idx]:.4f}”)
print(f”\nBest performing classes:”)
best_classes = np.argsort(class_accuracy)[-3:]
for idx in reversed(best_classes):
print(f” {class_names[idx]}: {class_accuracy[idx]:.4f}”)
# 10. 클래스별 혼동 분석 (가장 많이 혼동되는 클래스 쌍)
print(f”\nMost confused class pairs:”)
confusion_pairs = []
for i in range(len(class_names)):
for j in range(len(class_names)):
if i != j and cm[i, j] > 0:
confusion_pairs.append((class_names[i], class_names[j], cm[i, j]))
confusion_pairs.sort(key=lambda x: x[2], reverse=True)
for true_class, pred_class, count in confusion_pairs[:5]:
print(f” {true_class} → {pred_class}: {count} times”)
print(“\n시각화 완료!”)
답글 남기기