Vary C¶

This notebook provides a structured framework for time series analysis using LSTM models. It includes functions for:

  • Data loading and preprocessing
  • Model configuration and training
  • Evaluation and visualization
In [1]:
import pandas as pd
import os, sys
from pathlib import Path

# Setup paths
lstm_exp = Path("/home/ytli/research/lstm")
sys.path.append(str(lstm_exp))

# Import custom modules
from modules.plot import plot_all_subjects_combined, plot_time_series_by_subject
from modules.study import TimeSeriesAnalysis
from modules.data import snr_db_to_noise_std


import logging
logging.getLogger("pytorch_lightning.utilities.rank_zero").setLevel(logging.WARNING)
logging.getLogger("pytorch_lightning.accelerators.cuda").setLevel(logging.WARNING)
logging.getLogger("mlflow").setLevel(logging.ERROR)

Train¶

In [2]:
conditions_path = lstm_exp / 'conditions_noise.csv'
conditions_noise_df = pd.read_csv(conditions_path)
conditions_noise_df
Out[2]:
Condition Difference SNR A (train) B (train) C (train) D (train) E (test) F (test) G (test) H (test)
0 vary_A 1.0 20 [2.0, 3.0, 4.0] 1 0 0 [1.0, 5.0] 1 0 0
1 vary_B 1.0 20 1 [1.5, 2.5, 3.5] 0 0 1 [0.5, 4.5] 0 0
2 vary_C 0.5 20 1 1 [-0.5, 0.0, 0.5] 0 1 1 [-1.0, 1.0] 0
3 vary_D 2.0 20 1 1 0 [-2.0, 0.0, 2.0] 1 1 0 [-4.0, 4.0]
4 vary_A 1.0 10 [2.0, 3.0, 4.0] 1 0 0 [1.0, 5.0] 1 0 0
5 vary_B 1.0 10 1 [1.5, 2.5, 3.5] 0 0 1 [0.5, 4.5] 0 0
6 vary_C 0.5 10 1 1 [-0.5, 0.0, 0.5] 0 1 1 [-1.0, 1.0] 0
7 vary_D 2.0 10 1 1 0 [-2.0, 0.0, 2.0] 1 1 0 [-4.0, 4.0]
8 vary_A 1.0 5 [2.0, 3.0, 4.0] 1 0 0 [1.0, 5.0] 1 0 0
9 vary_B 1.0 5 1 [1.5, 2.5, 3.5] 0 0 1 [0.5, 4.5] 0 0
10 vary_C 0.5 5 1 1 [-0.5, 0.0, 0.5] 0 1 1 [-1.0, 1.0] 0
11 vary_D 2.0 5 1 1 0 [-2.0, 0.0, 2.0] 1 1 0 [-4.0, 4.0]
In [3]:
study_name = "study4"
folder_name = "vary_C"
sub_condition_df = conditions_noise_df[conditions_noise_df['Condition'] == folder_name]
for index, row in sub_condition_df.iterrows():
    difference_value = row['Difference']
    snr = row['SNR']
    A = float(row['A (train)'])
    noise_std = snr_db_to_noise_std(snr, A)

    print("="*80)
    print(f"\033[1m\033[94m>>> CONDITION: {row['Condition']}, DIFFERENCE VALUE: {difference_value}, SNR: {snr}, NOISE_VAR: {noise_std**2} <<<\033[0m")
    print("-"*80)
    
    # Determine which parameters are being varied based on the condition
    condition = row['Condition']
    if condition == "vary_A":
        trained_on = row['A (train)']
        test_on = row['E (test)']
        print(f"\033[1m\033[92mTRAINING SET:\033[0m A values in {trained_on}")
        print(f"\033[1m\033[93mTEST SET:\033[0m A values in {test_on}")
    elif condition == "vary_B":
        trained_on = row['B (train)']
        test_on = row['F (test)']
        print(f"\033[1m\033[92mTRAINING SET:\033[0m B values in {trained_on}")
        print(f"\033[1m\033[93mTEST SET:\033[0m B values in {test_on}")
    elif condition == "vary_C":
        trained_on = row['C (train)']
        test_on = row['G (test)']
        print(f"\033[1m\033[92mTRAINING SET:\033[0m C values in {trained_on}")
        print(f"\033[1m\033[93mTEST SET:\033[0m C values in {test_on}")
    elif condition == "vary_D":
        trained_on = row['D (train)']
        test_on = row['H (test)']
        print(f"\033[1m\033[92mTRAINING SET:\033[0m D values in {trained_on}")
        print(f"\033[1m\033[93mTEST SET:\033[0m D values in {test_on}")
    print("-"*80)

    # Initialize the analysis
    analysis = TimeSeriesAnalysis(experiment_name="test3", study_name=study_name, folder_name=folder_name, difference_value=difference_value, snr=snr)

    # Load and prepare data
    df, subject_info_df = analysis.load_data()
    test_df, test_subject_info_df = analysis.load_test_data()

    all_df = pd.concat([df, test_df], ignore_index=True)
    all_subject_info_df = pd.concat([subject_info_df, test_subject_info_df], ignore_index=True)
    plot_time_series_by_subject(all_df, all_subject_info_df).show()

    sliding_windows_dict = analysis.create_sliding_windows(df)
    sliding_windows_dict_test = analysis.create_sliding_windows(test_df)
    train_loader, val_loader, _not_used_test_loader = analysis.prepare_datasets(sliding_windows_dict)
    test_loader = analysis.prepare_test_datasets(sliding_windows_dict_test)

    # Train model
    print("\033[1m\033[95mTraining model...\033[0m")
    best_model, best_model_checkpoint_metrics, test_results = analysis.train_model(train_loader, val_loader, test_loader)
    print(f"\033[1m\033[96mBEST MODEL VAL LOSS:\033[0m {best_model_checkpoint_metrics['val_loss']:.5f}")
    print(f"\033[1m\033[96mTEST RESULTS:\033[0m loss = {test_results[0]['test_loss']:.5f}")

    # Evaluate and visualize
    print("\033[1m\033[95mEvaluating model on test data...\033[0m")
    test_eval_data = analysis.evaluate_model(best_model, test_loader)
    plot_all_subjects_combined(test_eval_data).show()
    print("="*80)
================================================================================
>>> CONDITION: vary_C, DIFFERENCE VALUE: 0.5, SNR: 20, NOISE_VAR: 0.005 <<<
--------------------------------------------------------------------------------
TRAINING SET: C values in [-0.5, 0.0, 0.5]
TEST SET: C values in [-1.0, 1.0]
--------------------------------------------------------------------------------
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Training model...
Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]
Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
🏃 View run vary_C_difference_0.5 at: http://localhost:8093/#/experiments/91/runs/d551b2bf91224e4c9ada5f8d04f07e0a
🧪 View experiment at: http://localhost:8093/#/experiments/91
BEST MODEL VAL LOSS: 0.01092
TEST RESULTS: loss = 0.00889
Evaluating model on test data...
================================================================================
================================================================================
>>> CONDITION: vary_C, DIFFERENCE VALUE: 0.5, SNR: 10, NOISE_VAR: 0.049999999999999996 <<<
--------------------------------------------------------------------------------
TRAINING SET: C values in [-0.5, 0.0, 0.5]
TEST SET: C values in [-1.0, 1.0]
--------------------------------------------------------------------------------
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Training model...
Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]
Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
🏃 View run vary_C_difference_0.5 at: http://localhost:8093/#/experiments/91/runs/3b3da3314c3f42dbbe8347614b9eaee5
🧪 View experiment at: http://localhost:8093/#/experiments/91
BEST MODEL VAL LOSS: 0.07044
TEST RESULTS: loss = 0.07487
Evaluating model on test data...
================================================================================
================================================================================
>>> CONDITION: vary_C, DIFFERENCE VALUE: 0.5, SNR: 5, NOISE_VAR: 0.15811388300841894 <<<
--------------------------------------------------------------------------------
TRAINING SET: C values in [-0.5, 0.0, 0.5]
TEST SET: C values in [-1.0, 1.0]
--------------------------------------------------------------------------------
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Training model...
Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]
Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
🏃 View run vary_C_difference_0.5 at: http://localhost:8093/#/experiments/91/runs/782527c2966b40cf8520c685df0c635b
🧪 View experiment at: http://localhost:8093/#/experiments/91
BEST MODEL VAL LOSS: 0.21017
TEST RESULTS: loss = 0.20039
Evaluating model on test data...
================================================================================