Vary B¶
This notebook provides a structured framework for time series analysis using LSTM models. It includes functions for:
- Data loading and preprocessing
- Model configuration and training
- Evaluation and visualization
In [1]:
import pandas as pd
import os, sys
from pathlib import Path
# Setup paths
lstm_exp = Path("/home/ytli/research/lstm")
sys.path.append(str(lstm_exp))
# Import custom modules
from modules.plot import plot_all_subjects_combined, plot_time_series_by_subject
from modules.study import TimeSeriesAnalysis
from modules.data import snr_db_to_noise_std
import logging
logging.getLogger("pytorch_lightning.utilities.rank_zero").setLevel(logging.WARNING)
logging.getLogger("pytorch_lightning.accelerators.cuda").setLevel(logging.WARNING)
logging.getLogger("mlflow").setLevel(logging.ERROR)
Train¶
In [2]:
conditions_path = lstm_exp / 'conditions_noise.csv'
conditions_noise_df = pd.read_csv(conditions_path)
conditions_noise_df
Out[2]:
| Condition | Difference | SNR | A (train) | B (train) | C (train) | D (train) | E (test) | F (test) | G (test) | H (test) | |
|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | vary_A | 1.0 | 20 | [2.0, 3.0, 4.0] | 1 | 0 | 0 | [1.0, 5.0] | 1 | 0 | 0 | 
| 1 | vary_B | 1.0 | 20 | 1 | [1.5, 2.5, 3.5] | 0 | 0 | 1 | [0.5, 4.5] | 0 | 0 | 
| 2 | vary_C | 0.5 | 20 | 1 | 1 | [-0.5, 0.0, 0.5] | 0 | 1 | 1 | [-1.0, 1.0] | 0 | 
| 3 | vary_D | 2.0 | 20 | 1 | 1 | 0 | [-2.0, 0.0, 2.0] | 1 | 1 | 0 | [-4.0, 4.0] | 
| 4 | vary_A | 1.0 | 10 | [2.0, 3.0, 4.0] | 1 | 0 | 0 | [1.0, 5.0] | 1 | 0 | 0 | 
| 5 | vary_B | 1.0 | 10 | 1 | [1.5, 2.5, 3.5] | 0 | 0 | 1 | [0.5, 4.5] | 0 | 0 | 
| 6 | vary_C | 0.5 | 10 | 1 | 1 | [-0.5, 0.0, 0.5] | 0 | 1 | 1 | [-1.0, 1.0] | 0 | 
| 7 | vary_D | 2.0 | 10 | 1 | 1 | 0 | [-2.0, 0.0, 2.0] | 1 | 1 | 0 | [-4.0, 4.0] | 
| 8 | vary_A | 1.0 | 5 | [2.0, 3.0, 4.0] | 1 | 0 | 0 | [1.0, 5.0] | 1 | 0 | 0 | 
| 9 | vary_B | 1.0 | 5 | 1 | [1.5, 2.5, 3.5] | 0 | 0 | 1 | [0.5, 4.5] | 0 | 0 | 
| 10 | vary_C | 0.5 | 5 | 1 | 1 | [-0.5, 0.0, 0.5] | 0 | 1 | 1 | [-1.0, 1.0] | 0 | 
| 11 | vary_D | 2.0 | 5 | 1 | 1 | 0 | [-2.0, 0.0, 2.0] | 1 | 1 | 0 | [-4.0, 4.0] | 
In [3]:
study_name = "study4"
folder_name = "vary_B"
sub_condition_df = conditions_noise_df[conditions_noise_df['Condition'] == folder_name]
for index, row in sub_condition_df.iterrows():
    difference_value = row['Difference']
    snr = row['SNR']
    A = float(row['A (train)'])
    noise_std = snr_db_to_noise_std(snr, A)
    print("="*80)
    print(f"\033[1m\033[94m>>> CONDITION: {row['Condition']}, DIFFERENCE VALUE: {difference_value}, SNR: {snr}, NOISE_VAR: {noise_std**2} <<<\033[0m")
    print("-"*80)
    
    # Determine which parameters are being varied based on the condition
    condition = row['Condition']
    if condition == "vary_A":
        trained_on = row['A (train)']
        test_on = row['E (test)']
        print(f"\033[1m\033[92mTRAINING SET:\033[0m A values in {trained_on}")
        print(f"\033[1m\033[93mTEST SET:\033[0m A values in {test_on}")
    elif condition == "vary_B":
        trained_on = row['B (train)']
        test_on = row['F (test)']
        print(f"\033[1m\033[92mTRAINING SET:\033[0m B values in {trained_on}")
        print(f"\033[1m\033[93mTEST SET:\033[0m B values in {test_on}")
    elif condition == "vary_C":
        trained_on = row['C (train)']
        test_on = row['G (test)']
        print(f"\033[1m\033[92mTRAINING SET:\033[0m C values in {trained_on}")
        print(f"\033[1m\033[93mTEST SET:\033[0m C values in {test_on}")
    elif condition == "vary_D":
        trained_on = row['D (train)']
        test_on = row['H (test)']
        print(f"\033[1m\033[92mTRAINING SET:\033[0m D values in {trained_on}")
        print(f"\033[1m\033[93mTEST SET:\033[0m D values in {test_on}")
    print("-"*80)
    # Initialize the analysis
    analysis = TimeSeriesAnalysis(experiment_name="test3", study_name=study_name, folder_name=folder_name, difference_value=difference_value, snr=snr)
    # Load and prepare data
    df, subject_info_df = analysis.load_data()
    test_df, test_subject_info_df = analysis.load_test_data()
    all_df = pd.concat([df, test_df], ignore_index=True)
    all_subject_info_df = pd.concat([subject_info_df, test_subject_info_df], ignore_index=True)
    plot_time_series_by_subject(all_df, all_subject_info_df).show()
    sliding_windows_dict = analysis.create_sliding_windows(df)
    sliding_windows_dict_test = analysis.create_sliding_windows(test_df)
    train_loader, val_loader, _not_used_test_loader = analysis.prepare_datasets(sliding_windows_dict)
    test_loader = analysis.prepare_test_datasets(sliding_windows_dict_test)
    # Train model
    print("\033[1m\033[95mTraining model...\033[0m")
    best_model, best_model_checkpoint_metrics, test_results = analysis.train_model(train_loader, val_loader, test_loader)
    print(f"\033[1m\033[96mBEST MODEL VAL LOSS:\033[0m {best_model_checkpoint_metrics['val_loss']:.5f}")
    print(f"\033[1m\033[96mTEST RESULTS:\033[0m loss = {test_results[0]['test_loss']:.5f}")
    # Evaluate and visualize
    print("\033[1m\033[95mEvaluating model on test data...\033[0m")
    test_eval_data = analysis.evaluate_model(best_model, test_loader)
    plot_all_subjects_combined(test_eval_data).show()
    print("="*80)
================================================================================ >>> CONDITION: vary_B, DIFFERENCE VALUE: 1.0, SNR: 20, NOISE_VAR: 0.005 <<< -------------------------------------------------------------------------------- TRAINING SET: B values in [1.5, 2.5, 3.5] TEST SET: B values in [0.5, 4.5] --------------------------------------------------------------------------------
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Training model...
Downloading artifacts: 0%| | 0/1 [00:00<?, ?it/s]
Downloading artifacts: 0%| | 0/1 [00:00<?, ?it/s]
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
🏃 View run vary_B_difference_1.0 at: http://localhost:8093/#/experiments/91/runs/2a4ce703a3864f3e8acd8696dfc75676 🧪 View experiment at: http://localhost:8093/#/experiments/91 BEST MODEL VAL LOSS: 0.01320 TEST RESULTS: loss = 0.02002 Evaluating model on test data...
================================================================================ ================================================================================ >>> CONDITION: vary_B, DIFFERENCE VALUE: 1.0, SNR: 10, NOISE_VAR: 0.049999999999999996 <<< -------------------------------------------------------------------------------- TRAINING SET: B values in [1.5, 2.5, 3.5] TEST SET: B values in [0.5, 4.5] --------------------------------------------------------------------------------
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Training model...
Downloading artifacts: 0%| | 0/1 [00:00<?, ?it/s]
Downloading artifacts: 0%| | 0/1 [00:00<?, ?it/s]
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
🏃 View run vary_B_difference_1.0 at: http://localhost:8093/#/experiments/91/runs/ae68441241d045db8d415677ee7f2d3c 🧪 View experiment at: http://localhost:8093/#/experiments/91 BEST MODEL VAL LOSS: 0.07863 TEST RESULTS: loss = 0.08985 Evaluating model on test data...
================================================================================ ================================================================================ >>> CONDITION: vary_B, DIFFERENCE VALUE: 1.0, SNR: 5, NOISE_VAR: 0.15811388300841894 <<< -------------------------------------------------------------------------------- TRAINING SET: B values in [1.5, 2.5, 3.5] TEST SET: B values in [0.5, 4.5] --------------------------------------------------------------------------------
Training model...
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Downloading artifacts: 0%| | 0/1 [00:00<?, ?it/s]
Downloading artifacts: 0%| | 0/1 [00:00<?, ?it/s]
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
🏃 View run vary_B_difference_1.0 at: http://localhost:8093/#/experiments/91/runs/d0e5368116b94341a516ce09f4391e04 🧪 View experiment at: http://localhost:8093/#/experiments/91 BEST MODEL VAL LOSS: 0.23936 TEST RESULTS: loss = 0.21749 Evaluating model on test data...
================================================================================