import pandas as pd
import os, sys
from pathlib import Path

# Setup paths
lstm_exp = Path("/home/ytli/research/lstm")
sys.path.append(str(lstm_exp))

# Import custom modules
from modules.plot import plot_all_subjects_combined, plot_time_series_by_subject
from modules.study import TimeSeriesAnalysis
from modules.data import snr_db_to_noise_std


import logging
logging.getLogger("pytorch_lightning.utilities.rank_zero").setLevel(logging.WARNING)
logging.getLogger("pytorch_lightning.accelerators.cuda").setLevel(logging.WARNING)
logging.getLogger("mlflow").setLevel(logging.ERROR)

conditions_path = lstm_exp / 'conditions_noise.csv'
conditions_noise_df = pd.read_csv(conditions_path)
conditions_noise_df

study_name = "study4"
folder_name = "vary_C"
sub_condition_df = conditions_noise_df[conditions_noise_df['Condition'] == folder_name]
for index, row in sub_condition_df.iterrows():
    difference_value = row['Difference']
    snr = row['SNR']
    A = float(row['A (train)'])
    noise_std = snr_db_to_noise_std(snr, A)

    print("="*80)
    print(f"\033[1m\033[94m>>> CONDITION: {row['Condition']}, DIFFERENCE VALUE: {difference_value}, SNR: {snr}, NOISE_VAR: {noise_std**2} <<<\033[0m")
    print("-"*80)
    
    # Determine which parameters are being varied based on the condition
    condition = row['Condition']
    if condition == "vary_A":
        trained_on = row['A (train)']
        test_on = row['E (test)']
        print(f"\033[1m\033[92mTRAINING SET:\033[0m A values in {trained_on}")
        print(f"\033[1m\033[93mTEST SET:\033[0m A values in {test_on}")
    elif condition == "vary_B":
        trained_on = row['B (train)']
        test_on = row['F (test)']
        print(f"\033[1m\033[92mTRAINING SET:\033[0m B values in {trained_on}")
        print(f"\033[1m\033[93mTEST SET:\033[0m B values in {test_on}")
    elif condition == "vary_C":
        trained_on = row['C (train)']
        test_on = row['G (test)']
        print(f"\033[1m\033[92mTRAINING SET:\033[0m C values in {trained_on}")
        print(f"\033[1m\033[93mTEST SET:\033[0m C values in {test_on}")
    elif condition == "vary_D":
        trained_on = row['D (train)']
        test_on = row['H (test)']
        print(f"\033[1m\033[92mTRAINING SET:\033[0m D values in {trained_on}")
        print(f"\033[1m\033[93mTEST SET:\033[0m D values in {test_on}")
    print("-"*80)

    # Initialize the analysis
    analysis = TimeSeriesAnalysis(experiment_name="test3", study_name=study_name, folder_name=folder_name, difference_value=difference_value, snr=snr)

    # Load and prepare data
    df, subject_info_df = analysis.load_data()
    test_df, test_subject_info_df = analysis.load_test_data()

    all_df = pd.concat([df, test_df], ignore_index=True)
    all_subject_info_df = pd.concat([subject_info_df, test_subject_info_df], ignore_index=True)
    plot_time_series_by_subject(all_df, all_subject_info_df).show()

    sliding_windows_dict = analysis.create_sliding_windows(df)
    sliding_windows_dict_test = analysis.create_sliding_windows(test_df)
    train_loader, val_loader, _not_used_test_loader = analysis.prepare_datasets(sliding_windows_dict)
    test_loader = analysis.prepare_test_datasets(sliding_windows_dict_test)

    # Train model
    print("\033[1m\033[95mTraining model...\033[0m")
    best_model, best_model_checkpoint_metrics, test_results = analysis.train_model(train_loader, val_loader, test_loader)
    print(f"\033[1m\033[96mBEST MODEL VAL LOSS:\033[0m {best_model_checkpoint_metrics['val_loss']:.5f}")
    print(f"\033[1m\033[96mTEST RESULTS:\033[0m loss = {test_results[0]['test_loss']:.5f}")

    # Evaluate and visualize
    print("\033[1m\033[95mEvaluating model on test data...\033[0m")
    test_eval_data = analysis.evaluate_model(best_model, test_loader)
    plot_all_subjects_combined(test_eval_data).show()
    print("="*80)

================================================================================
>>> CONDITION: vary_C, DIFFERENCE VALUE: 0.5, SNR: 20, NOISE_VAR: 0.005 <<<
--------------------------------------------------------------------------------
TRAINING SET: C values in [-0.5, 0.0, 0.5]
TEST SET: C values in [-1.0, 1.0]
--------------------------------------------------------------------------------

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

Training model...

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

🏃 View run vary_C_difference_0.5 at: http://localhost:8093/#/experiments/91/runs/d551b2bf91224e4c9ada5f8d04f07e0a
🧪 View experiment at: http://localhost:8093/#/experiments/91
BEST MODEL VAL LOSS: 0.01092
TEST RESULTS: loss = 0.00889
Evaluating model on test data...

================================================================================
================================================================================
>>> CONDITION: vary_C, DIFFERENCE VALUE: 0.5, SNR: 10, NOISE_VAR: 0.049999999999999996 <<<
--------------------------------------------------------------------------------
TRAINING SET: C values in [-0.5, 0.0, 0.5]
TEST SET: C values in [-1.0, 1.0]
--------------------------------------------------------------------------------

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

Training model...

Vary C¶

Train¶

	Condition	Difference	SNR	A (train)	B (train)	C (train)	D (train)	E (test)	F (test)	G (test)	H (test)
0	vary_A	1.0	20	[2.0, 3.0, 4.0]	1	0	0	[1.0, 5.0]	1	0	0
1	vary_B	1.0	20	1	[1.5, 2.5, 3.5]	0	0	1	[0.5, 4.5]	0	0
2	vary_C	0.5	20	1	1	[-0.5, 0.0, 0.5]	0	1	1	[-1.0, 1.0]	0
3	vary_D	2.0	20	1	1	0	[-2.0, 0.0, 2.0]	1	1	0	[-4.0, 4.0]
4	vary_A	1.0	10	[2.0, 3.0, 4.0]	1	0	0	[1.0, 5.0]	1	0	0
5	vary_B	1.0	10	1	[1.5, 2.5, 3.5]	0	0	1	[0.5, 4.5]	0	0
6	vary_C	0.5	10	1	1	[-0.5, 0.0, 0.5]	0	1	1	[-1.0, 1.0]	0
7	vary_D	2.0	10	1	1	0	[-2.0, 0.0, 2.0]	1	1	0	[-4.0, 4.0]
8	vary_A	1.0	5	[2.0, 3.0, 4.0]	1	0	0	[1.0, 5.0]	1	0	0
9	vary_B	1.0	5	1	[1.5, 2.5, 3.5]	0	0	1	[0.5, 4.5]	0	0
10	vary_C	0.5	5	1	1	[-0.5, 0.0, 0.5]	0	1	1	[-1.0, 1.0]	0
11	vary_D	2.0	5	1	1	0	[-2.0, 0.0, 2.0]	1	1	0	[-4.0, 4.0]