Vary A¶
This notebook provides a structured framework for time series analysis using LSTM models. It includes functions for:
- Data loading and preprocessing
- Model configuration and training
- Evaluation and visualization
In [1]:
import pandas as pd
import os, sys
from pathlib import Path
# Setup paths
lstm_exp = Path("/home/ytli/research/lstm")
sys.path.append(str(lstm_exp))
# Import custom modules
from modules.plot import plot_all_subjects_combined, plot_time_series_by_subject
from modules.study import TimeSeriesAnalysis
from modules.data import snr_db_to_noise_std
import logging
logging.getLogger("pytorch_lightning.utilities.rank_zero").setLevel(logging.WARNING)
logging.getLogger("pytorch_lightning.accelerators.cuda").setLevel(logging.WARNING)
logging.getLogger("mlflow").setLevel(logging.ERROR)
Train¶
In [2]:
conditions_path = lstm_exp / 'conditions_noise.csv'
conditions_noise_df = pd.read_csv(conditions_path)
conditions_noise_df
Out[2]:
| Condition | Difference | SNR | A (train) | B (train) | C (train) | D (train) | E (test) | F (test) | G (test) | H (test) | |
|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | vary_A | 1.0 | 20 | [2.0, 3.0, 4.0] | 1 | 0 | 0 | [1.0, 5.0] | 1 | 0 | 0 |
| 1 | vary_B | 1.0 | 20 | 1 | [1.5, 2.5, 3.5] | 0 | 0 | 1 | [0.5, 4.5] | 0 | 0 |
| 2 | vary_C | 0.5 | 20 | 1 | 1 | [-0.5, 0.0, 0.5] | 0 | 1 | 1 | [-1.0, 1.0] | 0 |
| 3 | vary_D | 2.0 | 20 | 1 | 1 | 0 | [-2.0, 0.0, 2.0] | 1 | 1 | 0 | [-4.0, 4.0] |
| 4 | vary_A | 1.0 | 10 | [2.0, 3.0, 4.0] | 1 | 0 | 0 | [1.0, 5.0] | 1 | 0 | 0 |
| 5 | vary_B | 1.0 | 10 | 1 | [1.5, 2.5, 3.5] | 0 | 0 | 1 | [0.5, 4.5] | 0 | 0 |
| 6 | vary_C | 0.5 | 10 | 1 | 1 | [-0.5, 0.0, 0.5] | 0 | 1 | 1 | [-1.0, 1.0] | 0 |
| 7 | vary_D | 2.0 | 10 | 1 | 1 | 0 | [-2.0, 0.0, 2.0] | 1 | 1 | 0 | [-4.0, 4.0] |
| 8 | vary_A | 1.0 | 5 | [2.0, 3.0, 4.0] | 1 | 0 | 0 | [1.0, 5.0] | 1 | 0 | 0 |
| 9 | vary_B | 1.0 | 5 | 1 | [1.5, 2.5, 3.5] | 0 | 0 | 1 | [0.5, 4.5] | 0 | 0 |
| 10 | vary_C | 0.5 | 5 | 1 | 1 | [-0.5, 0.0, 0.5] | 0 | 1 | 1 | [-1.0, 1.0] | 0 |
| 11 | vary_D | 2.0 | 5 | 1 | 1 | 0 | [-2.0, 0.0, 2.0] | 1 | 1 | 0 | [-4.0, 4.0] |
In [3]:
study_name = "study4"
folder_name = "vary_A"
sub_condition_df = conditions_noise_df[conditions_noise_df['Condition'] == folder_name]
for index, row in sub_condition_df.iterrows():
difference_value = row['Difference']
snr = row['SNR']
print("="*80)
print(f"\033[1m\033[94m>>> CONDITION: {row['Condition']}, DIFFERENCE VALUE: {difference_value}, SNR: {snr} <<<\033[0m")
print("-"*80)
# Determine which parameters are being varied based on the condition
condition = row['Condition']
if condition == "vary_A":
trained_on = row['A (train)']
test_on = row['E (test)']
print(f"\033[1m\033[92mTRAINING SET:\033[0m A values in {trained_on}")
print(f"\033[1m\033[93mTEST SET:\033[0m A values in {test_on}")
elif condition == "vary_B":
trained_on = row['B (train)']
test_on = row['F (test)']
print(f"\033[1m\033[92mTRAINING SET:\033[0m B values in {trained_on}")
print(f"\033[1m\033[93mTEST SET:\033[0m B values in {test_on}")
elif condition == "vary_C":
trained_on = row['C (train)']
test_on = row['G (test)']
print(f"\033[1m\033[92mTRAINING SET:\033[0m C values in {trained_on}")
print(f"\033[1m\033[93mTEST SET:\033[0m C values in {test_on}")
elif condition == "vary_D":
trained_on = row['D (train)']
test_on = row['H (test)']
print(f"\033[1m\033[92mTRAINING SET:\033[0m D values in {trained_on}")
print(f"\033[1m\033[93mTEST SET:\033[0m D values in {test_on}")
print("-"*80)
# Initialize the analysis
analysis = TimeSeriesAnalysis(experiment_name="test3", study_name=study_name, folder_name=folder_name, difference_value=difference_value, snr=snr)
# Load and prepare data
df, subject_info_df = analysis.load_data()
test_df, test_subject_info_df = analysis.load_test_data()
all_df = pd.concat([df, test_df], ignore_index=True)
all_subject_info_df = pd.concat([subject_info_df, test_subject_info_df], ignore_index=True)
plot_time_series_by_subject(all_df, all_subject_info_df).show()
sliding_windows_dict = analysis.create_sliding_windows(df)
sliding_windows_dict_test = analysis.create_sliding_windows(test_df)
train_loader, val_loader, _not_used_test_loader = analysis.prepare_datasets(sliding_windows_dict)
test_loader = analysis.prepare_test_datasets(sliding_windows_dict_test)
# Train model
print("\033[1m\033[95mTraining model...\033[0m")
best_model, best_model_checkpoint_metrics, test_results = analysis.train_model(train_loader, val_loader, test_loader)
print(f"\033[1m\033[96mBEST MODEL VAL LOSS:\033[0m {best_model_checkpoint_metrics['val_loss']:.5f}")
print(f"\033[1m\033[96mTEST RESULTS:\033[0m loss = {test_results[0]['test_loss']:.5f}")
# Evaluate and visualize
print("\033[1m\033[95mEvaluating model on test data...\033[0m")
test_eval_data = analysis.evaluate_model(best_model, test_loader)
plot_all_subjects_combined(test_eval_data).show()
print("="*80)
================================================================================ >>> CONDITION: vary_A, DIFFERENCE VALUE: 1.0, SNR: 20 <<< -------------------------------------------------------------------------------- TRAINING SET: A values in [2.0, 3.0, 4.0] TEST SET: A values in [1.0, 5.0] --------------------------------------------------------------------------------
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Training model...
Downloading artifacts: 0%| | 0/1 [00:00<?, ?it/s]
Downloading artifacts: 0%| | 0/1 [00:00<?, ?it/s]
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
🏃 View run vary_A_difference_1.0 at: http://localhost:8093/#/experiments/91/runs/b03440ba56e04746b79a55cf559dca27 🧪 View experiment at: http://localhost:8093/#/experiments/91 BEST MODEL VAL LOSS: 0.09479 TEST RESULTS: loss = 0.14847 Evaluating model on test data...
================================================================================ ================================================================================ >>> CONDITION: vary_A, DIFFERENCE VALUE: 1.0, SNR: 10 <<< -------------------------------------------------------------------------------- TRAINING SET: A values in [2.0, 3.0, 4.0] TEST SET: A values in [1.0, 5.0] --------------------------------------------------------------------------------
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Training model...
Downloading artifacts: 0%| | 0/1 [00:00<?, ?it/s]
Downloading artifacts: 0%| | 0/1 [00:00<?, ?it/s]
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
🏃 View run vary_A_difference_1.0 at: http://localhost:8093/#/experiments/91/runs/a8de148a485a4411b3b0e839fc2cc843 🧪 View experiment at: http://localhost:8093/#/experiments/91 BEST MODEL VAL LOSS: 0.69354 TEST RESULTS: loss = 0.76542 Evaluating model on test data...
================================================================================ ================================================================================ >>> CONDITION: vary_A, DIFFERENCE VALUE: 1.0, SNR: 5 <<< -------------------------------------------------------------------------------- TRAINING SET: A values in [2.0, 3.0, 4.0] TEST SET: A values in [1.0, 5.0] --------------------------------------------------------------------------------
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Training model...
Downloading artifacts: 0%| | 0/1 [00:00<?, ?it/s]
Downloading artifacts: 0%| | 0/1 [00:00<?, ?it/s]
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
🏃 View run vary_A_difference_1.0 at: http://localhost:8093/#/experiments/91/runs/8e3bfe4b1de541caa88e7ff5d564e6b1 🧪 View experiment at: http://localhost:8093/#/experiments/91 BEST MODEL VAL LOSS: 2.04042 TEST RESULTS: loss = 2.50137 Evaluating model on test data...
================================================================================