[카테고리:] 미분류

  • 원격 우분투라 JSON으로 받아 시각화 #또뜨

    import json

    import numpy as np

    import matplotlib.pyplot as plt

    import seaborn as sns

    from sklearn.metrics import confusion_matrix, classification_report

    import pandas as pd

    from google.colab import files

    # 1. 파일 업로드 (Colab에서 실행)

    print(“training_results.json 파일을 업로드하세요:”)

    uploaded = files.upload()

    # 2. 데이터 로드

    with open(‘training_results.json’, ‘r’, encoding=’utf-8′) as f:

    data = json.load(f)

    # 데이터 추출

    history = data[‘history’]

    cm = np.array(data[‘confusion_matrix’])

    class_names = data[‘class_names’]

    classification_report_dict = data[‘classification_report’]

    true_classes = np.array(data[‘true_classes’])

    predicted_classes = np.array(data[‘predicted_classes’])

    predictions_proba = np.array(data[‘predictions_proba’])

    print(f”클래스 수: {len(class_names)}”)

    print(f”클래스 이름: {class_names}”)

    print(f”검증 샘플 수: {len(true_classes)}”)

    # 3. 학습 히스토리 시각화

    plt.figure(figsize=(15, 5))

    # 정확도 그래프

    plt.subplot(1, 2, 1)

    plt.plot(history[‘accuracy’], label=’Training Accuracy’, marker=’o’)

    plt.plot(history[‘val_accuracy’], label=’Validation Accuracy’, marker=’s’)

    plt.title(‘Model Accuracy’)

    plt.xlabel(‘Epoch’)

    plt.ylabel(‘Accuracy’)

    plt.legend()

    plt.grid(True, alpha=0.3)

    # 손실 그래프

    plt.subplot(1, 2, 2)

    plt.plot(history[‘loss’], label=’Training Loss’, marker=’o’)

    plt.plot(history[‘val_loss’], label=’Validation Loss’, marker=’s’)

    plt.title(‘Model Loss’)

    plt.xlabel(‘Epoch’)

    plt.ylabel(‘Loss’)

    plt.legend()

    plt.grid(True, alpha=0.3)

    plt.tight_layout()

    plt.show()

    # 4. Confusion Matrix 히트맵

    plt.figure(figsize=(12, 10))

    sns.heatmap(cm,

    annot=True,

    fmt=’d’,

    cmap=’Greens’,

    xticklabels=class_names,

    yticklabels=class_names,

    cbar_kws={‘label’: ‘Count’})

    plt.title(‘Confusion Matrix’, fontsize=16, pad=20)

    plt.xlabel(‘Predicted Label’, fontsize=12)

    plt.ylabel(‘True Label’, fontsize=12)

    plt.xticks(rotation=45, ha=’right’)

    plt.yticks(rotation=0)

    plt.tight_layout()

    plt.show()

    # 5. 정규화된 Confusion Matrix

    cm_normalized = cm.astype(‘float’) / cm.sum(axis=1)[:, np.newaxis]

    plt.figure(figsize=(12, 10))

    sns.heatmap(cm_normalized,

    annot=True,

    fmt=’.2f’,

    cmap=’Greens’,

    xticklabels=class_names,

    yticklabels=class_names,

    cbar_kws={‘label’: ‘Proportion’})

    plt.title(‘Normalized Confusion Matrix’, fontsize=16, pad=20)

    plt.xlabel(‘Predicted Label’, fontsize=12)

    plt.ylabel(‘True Label’, fontsize=12)

    plt.xticks(rotation=45, ha=’right’)

    plt.yticks(rotation=0)

    plt.tight_layout()

    plt.show()

    # 6. 클래스별 성능 분석

    # Classification Report를 DataFrame으로 변환

    cr_df = pd.DataFrame(classification_report_dict).transpose()

    cr_df = cr_df.iloc[:-3] # macro avg, weighted avg, accuracy 제외

    # 클래스별 성능 시각화

    plt.figure(figsize=(15, 8))

    metrics = [‘precision’, ‘recall’, ‘f1-score’]

    x = np.arange(len(class_names))

    width = 0.25

    for i, metric in enumerate(metrics):

    plt.bar(x + i*width, cr_df[metric], width, label=metric.capitalize(), alpha=0.8)

    plt.xlabel(‘Classes’)

    plt.ylabel(‘Score’)

    plt.title(‘Classification Performance by Class’)

    plt.xticks(x + width, class_names, rotation=45, ha=’right’)

    plt.legend()

    plt.grid(True, alpha=0.3)

    plt.tight_layout()

    plt.show()

    # 7. 클래스별 정확도 분석

    class_accuracy = cm.diagonal() / cm.sum(axis=1)

    plt.figure(figsize=(12, 6))

    bars = plt.bar(class_names, class_accuracy, alpha=0.7, color=’skyblue’)

    plt.xlabel(‘Classes’)

    plt.ylabel(‘Accuracy’)

    plt.title(‘Per-Class Accuracy’)

    plt.xticks(rotation=45, ha=’right’)

    plt.grid(True, alpha=0.3)

    # 막대 위에 정확도 값 표시

    for bar, acc in zip(bars, class_accuracy):

    plt.text(bar.get_x() + bar.get_width()/2, bar.get_height() + 0.01,

    f'{acc:.3f}’, ha=’center’, va=’bottom’)

    plt.tight_layout()

    plt.show()

    # 8. 예측 확신도 분석

    confidence_scores = np.max(predictions_proba, axis=1)

    plt.figure(figsize=(12, 6))

    plt.subplot(1, 2, 1)

    plt.hist(confidence_scores, bins=30, alpha=0.7, color=’lightgreen’)

    plt.xlabel(‘Confidence Score’)

    plt.ylabel(‘Frequency’)

    plt.title(‘Distribution of Prediction Confidence’)

    plt.grid(True, alpha=0.3)

    plt.subplot(1, 2, 2)

    correct_predictions = (true_classes == predicted_classes)

    plt.boxplot([confidence_scores[correct_predictions], confidence_scores[~correct_predictions]],

    labels=[‘Correct’, ‘Incorrect’])

    plt.ylabel(‘Confidence Score’)

    plt.title(‘Confidence Score by Prediction Correctness’)

    plt.grid(True, alpha=0.3)

    plt.tight_layout()

    plt.show()

    # 9. 성능 요약 출력

    print(“\n” + “=”*50)

    print(“PERFORMANCE SUMMARY”)

    print(“=”*50)

    total_accuracy = np.sum(cm.diagonal()) / np.sum(cm)

    print(f”Overall Accuracy: {total_accuracy:.4f}”)

    print(f”Final Training Accuracy: {history[‘accuracy’][-1]:.4f}”)

    print(f”Final Validation Accuracy: {history[‘val_accuracy’][-1]:.4f}”)

    print(f”Best Validation Accuracy: {max(history[‘val_accuracy’]):.4f}”)

    print(f”\nPer-class Accuracy:”)

    for i, (class_name, acc) in enumerate(zip(class_names, class_accuracy)):

    print(f” {class_name}: {acc:.4f}”)

    print(f”\nWorst performing classes:”)

    worst_classes = np.argsort(class_accuracy)[:3]

    for idx in worst_classes:

    print(f” {class_names[idx]}: {class_accuracy[idx]:.4f}”)

    print(f”\nBest performing classes:”)

    best_classes = np.argsort(class_accuracy)[-3:]

    for idx in reversed(best_classes):

    print(f” {class_names[idx]}: {class_accuracy[idx]:.4f}”)

    # 10. 클래스별 혼동 분석 (가장 많이 혼동되는 클래스 쌍)

    print(f”\nMost confused class pairs:”)

    confusion_pairs = []

    for i in range(len(class_names)):

    for j in range(len(class_names)):

    if i != j and cm[i, j] > 0:

    confusion_pairs.append((class_names[i], class_names[j], cm[i, j]))

    confusion_pairs.sort(key=lambda x: x[2], reverse=True)

    for true_class, pred_class, count in confusion_pairs[:5]:

    print(f” {true_class} → {pred_class}: {count} times”)

    print(“\n시각화 완료!”)