원격 우분투라 JSON으로 받아 시각화 #또뜨

import json

import numpy as np

import matplotlib.pyplot as plt

import seaborn as sns

from sklearn.metrics import confusion_matrix, classification_report

import pandas as pd

from google.colab import files

# 1. 파일 업로드 (Colab에서 실행)

print(“training_results.json 파일을 업로드하세요:”)

uploaded = files.upload()

# 2. 데이터 로드

with open(‘training_results.json’, ‘r’, encoding=’utf-8′) as f:

data = json.load(f)

# 데이터 추출

history = data[‘history’]

cm = np.array(data[‘confusion_matrix’])

class_names = data[‘class_names’]

classification_report_dict = data[‘classification_report’]

true_classes = np.array(data[‘true_classes’])

predicted_classes = np.array(data[‘predicted_classes’])

predictions_proba = np.array(data[‘predictions_proba’])

print(f”클래스 수: {len(class_names)}”)

print(f”클래스 이름: {class_names}”)

print(f”검증 샘플 수: {len(true_classes)}”)

# 3. 학습 히스토리 시각화

plt.figure(figsize=(15, 5))

# 정확도 그래프

plt.subplot(1, 2, 1)

plt.plot(history[‘accuracy’], label=’Training Accuracy’, marker=’o’)

plt.plot(history[‘val_accuracy’], label=’Validation Accuracy’, marker=’s’)

plt.title(‘Model Accuracy’)

plt.xlabel(‘Epoch’)

plt.ylabel(‘Accuracy’)

plt.legend()

plt.grid(True, alpha=0.3)

# 손실 그래프

plt.subplot(1, 2, 2)

plt.plot(history[‘loss’], label=’Training Loss’, marker=’o’)

plt.plot(history[‘val_loss’], label=’Validation Loss’, marker=’s’)

plt.title(‘Model Loss’)

plt.xlabel(‘Epoch’)

plt.ylabel(‘Loss’)

plt.legend()

plt.grid(True, alpha=0.3)

plt.tight_layout()

plt.show()

# 4. Confusion Matrix 히트맵

plt.figure(figsize=(12, 10))

sns.heatmap(cm,

annot=True,

fmt=’d’,

cmap=’Greens’,

xticklabels=class_names,

yticklabels=class_names,

cbar_kws={‘label’: ‘Count’})

plt.title(‘Confusion Matrix’, fontsize=16, pad=20)

plt.xlabel(‘Predicted Label’, fontsize=12)

plt.ylabel(‘True Label’, fontsize=12)

plt.xticks(rotation=45, ha=’right’)

plt.yticks(rotation=0)

plt.tight_layout()

plt.show()

# 5. 정규화된 Confusion Matrix

cm_normalized = cm.astype(‘float’) / cm.sum(axis=1)[:, np.newaxis]

plt.figure(figsize=(12, 10))

sns.heatmap(cm_normalized,

annot=True,

fmt=’.2f’,

cmap=’Greens’,

xticklabels=class_names,

yticklabels=class_names,

cbar_kws={‘label’: ‘Proportion’})

plt.title(‘Normalized Confusion Matrix’, fontsize=16, pad=20)

plt.xlabel(‘Predicted Label’, fontsize=12)

plt.ylabel(‘True Label’, fontsize=12)

plt.xticks(rotation=45, ha=’right’)

plt.yticks(rotation=0)

plt.tight_layout()

plt.show()

# 6. 클래스별 성능 분석

# Classification Report를 DataFrame으로 변환

cr_df = pd.DataFrame(classification_report_dict).transpose()

cr_df = cr_df.iloc[:-3] # macro avg, weighted avg, accuracy 제외

# 클래스별 성능 시각화

plt.figure(figsize=(15, 8))

metrics = [‘precision’, ‘recall’, ‘f1-score’]

x = np.arange(len(class_names))

width = 0.25

for i, metric in enumerate(metrics):

plt.bar(x + i*width, cr_df[metric], width, label=metric.capitalize(), alpha=0.8)

plt.xlabel(‘Classes’)

plt.ylabel(‘Score’)

plt.title(‘Classification Performance by Class’)

plt.xticks(x + width, class_names, rotation=45, ha=’right’)

plt.legend()

plt.grid(True, alpha=0.3)

plt.tight_layout()

plt.show()

# 7. 클래스별 정확도 분석

class_accuracy = cm.diagonal() / cm.sum(axis=1)

plt.figure(figsize=(12, 6))

bars = plt.bar(class_names, class_accuracy, alpha=0.7, color=’skyblue’)

plt.xlabel(‘Classes’)

plt.ylabel(‘Accuracy’)

plt.title(‘Per-Class Accuracy’)

plt.xticks(rotation=45, ha=’right’)

plt.grid(True, alpha=0.3)

# 막대 위에 정확도 값 표시

for bar, acc in zip(bars, class_accuracy):

plt.text(bar.get_x() + bar.get_width()/2, bar.get_height() + 0.01,

f'{acc:.3f}’, ha=’center’, va=’bottom’)

plt.tight_layout()

plt.show()

# 8. 예측 확신도 분석

confidence_scores = np.max(predictions_proba, axis=1)

plt.figure(figsize=(12, 6))

plt.subplot(1, 2, 1)

plt.hist(confidence_scores, bins=30, alpha=0.7, color=’lightgreen’)

plt.xlabel(‘Confidence Score’)

plt.ylabel(‘Frequency’)

plt.title(‘Distribution of Prediction Confidence’)

plt.grid(True, alpha=0.3)

plt.subplot(1, 2, 2)

correct_predictions = (true_classes == predicted_classes)

plt.boxplot([confidence_scores[correct_predictions], confidence_scores[~correct_predictions]],

labels=[‘Correct’, ‘Incorrect’])

plt.ylabel(‘Confidence Score’)

plt.title(‘Confidence Score by Prediction Correctness’)

plt.grid(True, alpha=0.3)

plt.tight_layout()

plt.show()

# 9. 성능 요약 출력

print(“\n” + “=”*50)

print(“PERFORMANCE SUMMARY”)

print(“=”*50)

total_accuracy = np.sum(cm.diagonal()) / np.sum(cm)

print(f”Overall Accuracy: {total_accuracy:.4f}”)

print(f”Final Training Accuracy: {history[‘accuracy’][-1]:.4f}”)

print(f”Final Validation Accuracy: {history[‘val_accuracy’][-1]:.4f}”)

print(f”Best Validation Accuracy: {max(history[‘val_accuracy’]):.4f}”)

print(f”\nPer-class Accuracy:”)

for i, (class_name, acc) in enumerate(zip(class_names, class_accuracy)):

print(f” {class_name}: {acc:.4f}”)

print(f”\nWorst performing classes:”)

worst_classes = np.argsort(class_accuracy)[:3]

for idx in worst_classes:

print(f” {class_names[idx]}: {class_accuracy[idx]:.4f}”)

print(f”\nBest performing classes:”)

best_classes = np.argsort(class_accuracy)[-3:]

for idx in reversed(best_classes):

print(f” {class_names[idx]}: {class_accuracy[idx]:.4f}”)

# 10. 클래스별 혼동 분석 (가장 많이 혼동되는 클래스 쌍)

print(f”\nMost confused class pairs:”)

confusion_pairs = []

for i in range(len(class_names)):

for j in range(len(class_names)):

if i != j and cm[i, j] > 0:

confusion_pairs.append((class_names[i], class_names[j], cm[i, j]))

confusion_pairs.sort(key=lambda x: x[2], reverse=True)

for true_class, pred_class, count in confusion_pairs[:5]:

print(f” {true_class} → {pred_class}: {count} times”)

print(“\n시각화 완료!”)

코멘트

답글 남기기

이메일 주소는 공개되지 않습니다. 필수 필드는 *로 표시됩니다