# --------------------------------------
# Start of statistical analysis
# --------------------------------------

# Import the required libraries
import matplotlib.pyplot as plt
import statsmodels as Stats
import pandas as pd
import numpy as np
from scipy.stats import t

# --------------------------------------
# Load the dataset
# --------------------------------------

# Read CSV file
qvi_data = pd.read_csv("C:\\Users\\rageg\\Downloads\\QVI_data.csv")

# Check the initial structure
qvi_data.info()

# Convert DATE column to datetime
qvi_data["DATE"] = pd.to_datetime(qvi_data["DATE"])

# Check structure after conversion
qvi_data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 263214 entries, 0 to 263213
Data columns (total 12 columns):
 #   Column            Non-Null Count   Dtype  
---  ------            --------------   -----  
 0   LYLTY_CARD_NBR    263214 non-null  int64  
 1   DATE              263214 non-null  object 
 2   STORE_NBR         263214 non-null  int64  
 3   TXN_ID            263214 non-null  int64  
 4   PROD_NBR          263214 non-null  int64  
 5   PROD_NAME         263214 non-null  object 
 6   PROD_QTY          263214 non-null  int64  
 7   TOT_SALES         263214 non-null  float64
 8   PACK_SIZE         263214 non-null  int64  
 9   BRAND             263214 non-null  object 
 10  LIFESTAGE         263214 non-null  object 
 11  PREMIUM_CUSTOMER  263214 non-null  object 
dtypes: float64(1), int64(6), object(5)
memory usage: 24.1+ MB
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 263214 entries, 0 to 263213
Data columns (total 12 columns):
 #   Column            Non-Null Count   Dtype         
---  ------            --------------   -----         
 0   LYLTY_CARD_NBR    263214 non-null  int64         
 1   DATE              263214 non-null  datetime64[ns]
 2   STORE_NBR         263214 non-null  int64         
 3   TXN_ID            263214 non-null  int64         
 4   PROD_NBR          263214 non-null  int64         
 5   PROD_NAME         263214 non-null  object        
 6   PROD_QTY          263214 non-null  int64         
 7   TOT_SALES         263214 non-null  float64       
 8   PACK_SIZE         263214 non-null  int64         
 9   BRAND             263214 non-null  object        
 10  LIFESTAGE         263214 non-null  object        
 11  PREMIUM_CUSTOMER  263214 non-null  object        
dtypes: datetime64[ns](1), float64(1), int64(6), object(4)
memory usage: 24.1+ MB

# --------------------------------------
# Create monthly identifiers and unit price
# --------------------------------------

# Convert DATE to MONTH_ID formatted as YYYYMM
qvi_data["MONTH_ID"] = qvi_data["DATE"].dt.strftime("%Y%m").astype(int)

# Compute unit price
qvi_data["UNIT_PRICE"] = qvi_data["TOT_SALES"] / qvi_data["PROD_QTY"]

qvi_data


# --------------------------------------
# Group by store and month to create monthly metrics
# --------------------------------------

qvi_grouped = qvi_data.groupby(
    ["STORE_NBR", "MONTH_ID"]
).agg(
    TOTAL_SALES=("TOT_SALES", "sum"),
    CUSTOMERS=("LYLTY_CARD_NBR", "nunique"),
    TRANSACTIONS=("TXN_ID", "nunique"),
    AVG_PRICE_PER_UNIT=("UNIT_PRICE", "mean"),
    CHIPS_SOLD=("PROD_QTY", "sum")
).reset_index()

# Compute additional derived metrics
qvi_grouped["CHIPS_PER_CUSTOMER"] = qvi_grouped["CHIPS_SOLD"] / qvi_grouped["CUSTOMERS"]
qvi_grouped["TXN_PER_CUSTOMER"] = qvi_grouped["TRANSACTIONS"] / qvi_grouped["CUSTOMERS"]

# --------------------------------------
# Filter to pre-trial period and full observation stores
# --------------------------------------

# Copy grouped data
df_full = qvi_grouped.copy()

# Keep pre-trial months only
df_pre = df_full[df_full["MONTH_ID"] < 201902]

# Count months per store
full_obs_counts = df_full["STORE_NBR"].value_counts()

# Identify stores with all twelve required observations
stores_with_12 = full_obs_counts[full_obs_counts == 12].index

# Keep only pre-trial months for stores with full observations
qvi_grouped_full_obs = df_pre[df_pre["STORE_NBR"].isin(stores_with_12)]

# --------------------------------------
# Define trial stores and metrics
# --------------------------------------

trials = [77, 86, 88]

# Metrics used for correlation and magnitude distance
metrics = ["TOTAL_SALES", "CUSTOMERS"]

# Identify control stores
controls = [
    store for store in qvi_grouped_full_obs["STORE_NBR"].unique()
    if store not in trials
]


# --------------------------------------
# Magnitude distance function
# --------------------------------------

def mag_dist(df, trial_stores, mets):

    # Split trial and control data
    trials_df = df[df["STORE_NBR"].isin(trial_stores)]
    controls_df = df[~df["STORE_NBR"].isin(trial_stores)]
    
    # Merge on month to compare trial and control stores
    new_df = pd.merge(
        controls_df,
        trials_df,
        on="MONTH_ID",
        suffixes=("_CNTRL", "_TRIAL")
    )
    
    # Initialize comparison table
    comparison_df = new_df[["STORE_NBR_CNTRL", "STORE_NBR_TRIAL"]].copy()
    
    # Compute absolute distance for each metric
    for metric in mets:
        comparison_df[metric + "_abs_dist"] = (
            new_df[metric + "_CNTRL"] - new_df[metric + "_TRIAL"]
        ).abs()
    
    # Standardize distances using min-max scaling (flipped so higher is more similar)
    for metric in mets:
        col = metric + "_abs_dist"
        min_val = comparison_df[col].min()
        max_val = comparison_df[col].max()
        
        if max_val > min_val:
            scaled = (comparison_df[col] - min_val) / (max_val - min_val)
            flipped = 1 - scaled
            comparison_df[col.replace("_abs_dist", "_std_dist")] = flipped
        else:
            comparison_df[col.replace("_abs_dist", "_std_dist")] = 1.0
    
    # Select standardized distance columns
    std_list = [metric + "_std_dist" for metric in mets]
    std_dist = comparison_df[
        ["STORE_NBR_CNTRL", "STORE_NBR_TRIAL"] + std_list
    ]
    
    # Average similarity across months
    mean_distances = (
        std_dist.groupby(["STORE_NBR_CNTRL", "STORE_NBR_TRIAL"])
        .mean()
        .reset_index()
    )
    
    return mean_distances


# --------------------------------------
# Correlation function
# --------------------------------------

def p_corr_wide(df, trial_stores, mets):

    # Identify control stores
    controls = [
        store for store in df["STORE_NBR"].unique()
        if store not in trial_stores
    ]
    
    final_df = None

    for metric in mets:

        # Pivot to compute correlations across stores
        pivoted = df.pivot(index="MONTH_ID", columns="STORE_NBR", values=metric)
        
        # Store-to-store correlations
        corr_df = pivoted.corr().loc[controls, trial_stores]
        
        # Convert correlation matrix to long format
        corr_long = corr_df.reset_index().melt(
            id_vars="STORE_NBR",
            var_name="STORE_NBR_TRIAL",
            value_name=f"{metric}_corr"
        ).rename(columns={"STORE_NBR": "STORE_NBR_CNTRL"})
        
        # Merge correlations for all metrics
        if final_df is None:
            final_df = corr_long
        else:
            final_df = final_df.merge(
                corr_long,
                on=["STORE_NBR_CNTRL", "STORE_NBR_TRIAL"],
                how="inner"
            )

    return final_df

# --------------------------------------
# Prepare trial store identifiers
# --------------------------------------

# Copy grouped data
df = qvi_grouped.copy()

# Define trial store numbers
trial_store77 = 77
trial_store86 = 86
trial_store88 = 88

# --------------------------------------
# Compute correlation and magnitude distance
# --------------------------------------

# Correlation scores between control and trial stores
store_corr = p_corr_wide(qvi_grouped_full_obs, trials, metrics)

# Magnitude distance scores between control and trial stores
store_mag = mag_dist(qvi_grouped_full_obs, trials, metrics)

store_mag

# --------------------------------------
# Select the best control store for a trial store
# --------------------------------------

def select_control_store(store_corr, store_mag, trial_store, corr_weight=0.5, mag_weight=0.5):
    """
    Compute the best control store match for a given trial store.
    store_corr is the correlation table from p_corr_wide
    store_mag is the magnitude distance table from mag_dist
    trial_store is the store being evaluated
    corr_weight controls how much correlation contributes to the score
    mag_weight controls how much magnitude distance contributes to the score
    Returns the best control store and the table of all scores
    """

    # --------------------------------------
    # Combine correlation and magnitude for TOTAL_SALES
    # --------------------------------------
    
    corr_sales = store_corr[store_corr["STORE_NBR_TRIAL"] == trial_store][
        ["STORE_NBR_CNTRL", "STORE_NBR_TRIAL", "TOTAL_SALES_corr"]
    ]

    mag_sales = store_mag[store_mag["STORE_NBR_TRIAL"] == trial_store][
        ["STORE_NBR_CNTRL", "STORE_NBR_TRIAL", "TOTAL_SALES_std_dist"]
    ]

    sales_df = corr_sales.merge(mag_sales, on=["STORE_NBR_CNTRL", "STORE_NBR_TRIAL"])
    sales_df["Final_score_sales"] = (
        sales_df["TOTAL_SALES_corr"] * corr_weight +
        sales_df["TOTAL_SALES_std_dist"] * mag_weight
    )

    # --------------------------------------
    # Combine correlation and magnitude for CUSTOMERS
    # --------------------------------------
    
    corr_cust = store_corr[store_corr["STORE_NBR_TRIAL"] == trial_store][
        ["STORE_NBR_CNTRL", "STORE_NBR_TRIAL", "CUSTOMERS_corr"]
    ]

    mag_cust = store_mag[store_mag["STORE_NBR_TRIAL"] == trial_store][
        ["STORE_NBR_CNTRL", "STORE_NBR_TRIAL", "CUSTOMERS_std_dist"]
    ]

    custo_df = corr_cust.merge(mag_cust, on=["STORE_NBR_CNTRL", "STORE_NBR_TRIAL"])
    custo_df["Final_score_custo"] = (
        custo_df["CUSTOMERS_corr"] * corr_weight +
        custo_df["CUSTOMERS_std_dist"] * mag_weight
    )

    # --------------------------------------
    # Merge both drivers into one table
    # --------------------------------------
    
    final_df = sales_df.merge(
        custo_df,
        on=["STORE_NBR_CNTRL", "STORE_NBR_TRIAL"]
    )[[
        "STORE_NBR_CNTRL",
        "STORE_NBR_TRIAL",
        "Final_score_sales",
        "Final_score_custo"
    ]]

    # --------------------------------------
    # Compute composite score
    # --------------------------------------
    
    final_df["SCORE"] = (
        final_df["Final_score_sales"] * 0.5 +
        final_df["Final_score_custo"] * 0.5
    )

    # Sort stores by composite score
    final_df = final_df.sort_values("SCORE", ascending=False)

    # Select the highest ranked control store
    best_control = int(final_df.iloc[0]["STORE_NBR_CNTRL"])

    return best_control, final_df

# --------------------------------------
# Plot pre-trial sales and customers
# --------------------------------------

def plot_pretrial_sales_and_customers(df, trial_store, control_store):
    """
    Create side-by-side panels comparing pre-trial sales and customers
    for a trial store versus its matched control store.
    """

    temp = df.copy()

    # Label store types
    temp["Store_type"] = temp["STORE_NBR"].apply(
        lambda x: "Trial" if x == trial_store
        else "Control" if x == control_store
        else "Other stores"
    )

    # --------------------------------------
    # Convert MONTH_ID to a datetime month
    # --------------------------------------
    def add_month_date(x):
        return pd.to_datetime(
            x.astype(str).str[:4] + "-" +
            x.astype(str).str[4:] + "-01"
        )

    # --------------------------------------
    # Pre-trial sales
    # --------------------------------------
    sales = (
        temp.groupby(["MONTH_ID", "Store_type"])["TOTAL_SALES"]
            .mean()
            .reset_index()
    )
    sales["TransactionMonth"] = add_month_date(sales["MONTH_ID"])
    sales = sales[sales["MONTH_ID"] < 201903]

    # --------------------------------------
    # Pre-trial customers
    # --------------------------------------
    cust = (
        temp.groupby(["MONTH_ID", "Store_type"])["CUSTOMERS"]
            .mean()
            .reset_index(name="numberCustomers")
    )
    cust["TransactionMonth"] = add_month_date(cust["MONTH_ID"])
    cust = cust[cust["MONTH_ID"] < 201903]

    # --------------------------------------
    # Create side-by-side comparison plot
    # --------------------------------------
    fig, axes = plt.subplots(1, 2, figsize=(14, 5), sharex=True)

    # Left panel: Sales
    for stype, sub in sales.groupby("Store_type"):
        axes[0].plot(sub["TransactionMonth"], sub["TOTAL_SALES"], label=stype, linewidth=2)
    axes[0].set_title(f"Store {trial_store} Pre-trial Sales (Control {control_store})")
    axes[0].set_xlabel("Month")
    axes[0].set_ylabel("Total Sales")
    axes[0].grid(alpha=0.25)

    # Right panel: Customers
    for stype, sub in cust.groupby("Store_type"):
        axes[1].plot(sub["TransactionMonth"], sub["numberCustomers"], label=stype, linewidth=2)
    axes[1].set_title(f"Store {trial_store} Pre-trial Customers (Control {control_store})")
    axes[1].set_xlabel("Month")
    axes[1].set_ylabel("Total Customers")
    axes[1].grid(alpha=0.25)

    # Shared legend
    handles, labels = axes[0].get_legend_handles_labels()
    fig.legend(handles, labels, loc="upper center", ncol=3)

    fig.tight_layout(rect=[0, 0, 1, 0.93])
    plt.show()

# --------------------------------------
# Select control stores for each trial store
# --------------------------------------

control_store77 = select_control_store(store_corr, store_mag, trial_store77)[0]
control_store86 = select_control_store(store_corr, store_mag, trial_store86)[0]
control_store88 = select_control_store(store_corr, store_mag, trial_store88)[0]

# --------------------------------------
# Plot pre-trial comparisons for all trial stores
# --------------------------------------

plot_pretrial_sales_and_customers(qvi_grouped, trial_store77, control_store77)
plot_pretrial_sales_and_customers(qvi_grouped, trial_store86, control_store86)
plot_pretrial_sales_and_customers(qvi_grouped, trial_store88, control_store88)

# --------------------------------------
# Scaling function for pre-trial metrics
# --------------------------------------

metric_cols = ["TOTAL_SALES", "CUSTOMERS"]

def scale_pretrial_metric(df, trial_store, control_store, metric_col, cutoff=201902):

    df_copy = df.copy()

    # Filter to pre-trial months
    full_pre = df_copy[df_copy["MONTH_ID"] < cutoff]

    # Compute total values for scaling
    trial_pre_total = full_pre[full_pre["STORE_NBR"] == trial_store][metric_col].sum()
    control_pre_total = full_pre[full_pre["STORE_NBR"] == control_store][metric_col].sum()

    # Compute scaling factor, avoid division by zero
    if control_pre_total == 0:
        scaling_factor = np.nan
    else:
        scaling_factor = trial_pre_total / control_pre_total

    # Name of the scaled column
    new_col = f"scaled_{metric_col}"

    # Apply scaling to the control store only
    df_copy[new_col] = np.where(
        df_copy["STORE_NBR"] == control_store,
        df_copy[metric_col] * scaling_factor,
        np.nan
    )

    return df_copy, scaling_factor


# --------------------------------------
# Compute percentage difference and t-values
# --------------------------------------

def compute_percentage_diff_and_tvalues(df_scaled, trial_store, control_store, metric,
                                        pre_cutoff=201902,
                                        trial_start=201902,
                                        trial_end=201904):
    """
    Compute percentage difference between trial and scaled control,
    compute pre-trial standard deviation,
    compute t-values,
    compute critical t-value for significance.
    """

    scaled_col = f"scaled_{metric}"

    # Extract scaled control values
    control_scaled = df_scaled[df_scaled["STORE_NBR"] == control_store][
        ["MONTH_ID", scaled_col]
    ].rename(columns={scaled_col: "scaledControl"})

    # Extract trial values
    trial_vals = df_scaled[df_scaled["STORE_NBR"] == trial_store][
        ["MONTH_ID", metric]
    ].rename(columns={metric: "trialValue"})

    # Merge trial and control values
    percentageDiff = control_scaled.merge(trial_vals, on="MONTH_ID")

    # Compute percentage difference
    percentageDiff["percentageDiff"] = (
        (percentageDiff["trialValue"] - percentageDiff["scaledControl"]).abs()
        / percentageDiff["scaledControl"]
    )

    # Compute pre-trial standard deviation
    stdDev = percentageDiff[percentageDiff["MONTH_ID"] < pre_cutoff][
        "percentageDiff"
    ].std()

    # Compute t-values
    percentageDiff["tValue"] = percentageDiff["percentageDiff"] / stdDev

    # Convert MONTH_ID to datetime
    percentageDiff["TransactionMonth"] = pd.to_datetime(
        percentageDiff["MONTH_ID"].astype(str) + "01",
        format="%Y%m%d"
    )

    # Extract trial-period t-values
    trial_tvalues = percentageDiff[
        (percentageDiff["MONTH_ID"] >= trial_start) &
        (percentageDiff["MONTH_ID"] <= trial_end)
    ][["TransactionMonth", "tValue"]]

    # Compute critical t-value (df = 7)
    degrees_of_freedom = 7
    critical_t = t.ppf(0.95, df=degrees_of_freedom)

    return trial_tvalues, percentageDiff, stdDev, critical_t

# --------------------------------------
# Scale sales and customer metrics for each trial store
# --------------------------------------

# Scaled sales data
scaled_sales_store77 = scale_pretrial_metric(df_full, trial_store77, control_store77, metric_cols[0])[0]
scaled_sales_store86 = scale_pretrial_metric(df_full, trial_store86, control_store86, metric_cols[0])[0]
scaled_sales_store88 = scale_pretrial_metric(df_full, trial_store88, control_store88, metric_cols[0])[0]

# Scaled customer data
scaled_cust_store77 = scale_pretrial_metric(df_full, trial_store77, control_store77, metric_cols[1])[0]
scaled_cust_store86 = scale_pretrial_metric(df_full, trial_store86, control_store86, metric_cols[1])[0]
scaled_cust_store88 = scale_pretrial_metric(df_full, trial_store88, control_store88, metric_cols[1])[0]

# --------------------------------------
# Compute percentage differences and t-values for Store 77
# --------------------------------------

tvals_77_sales, diff_77_sales, sd_77_sales, crit_77_sales = compute_percentage_diff_and_tvalues(
    scaled_sales_store77,
    trial_store77,
    control_store77,
    metric_cols[0]
)

tvals_77_cust, diff_77_cust, sd_77_cust, crit_77_cust = compute_percentage_diff_and_tvalues(
    scaled_cust_store77,
    trial_store77,
    control_store77,
    metric_cols[1]
)


# --------------------------------------
# Compute percentage differences and t-values for Store 86
# --------------------------------------

tvals_86_sales, diff_86_sales, sd_86_sales, crit_86_sales = compute_percentage_diff_and_tvalues(
    scaled_sales_store86,
    trial_store86,
    control_store86,
    metric_cols[0]
)

tvals_86_cust, diff_86_cust, sd_86_cust, crit_86_cust = compute_percentage_diff_and_tvalues(
    scaled_cust_store86,
    trial_store86,
    control_store86,
    metric_cols[1]
)


# --------------------------------------
# Compute percentage differences and t-values for Store 88
# --------------------------------------

tvals_88_sales, diff_88_sales, sd_88_sales, crit_88_sales = compute_percentage_diff_and_tvalues(
    scaled_sales_store88,
    trial_store88,
    control_store88,
    metric_cols[0]
)

tvals_88_cust, diff_88_cust, sd_88_cust, crit_88_cust = compute_percentage_diff_and_tvalues(
    scaled_cust_store88,
    trial_store88,
    control_store88,
    metric_cols[1]
)

# Display difference table for Store 77 sales
diff_77_sales

# --------------------------------------
# Highlight trial period rows in styled tables
# --------------------------------------

def highlight_trial_rows(row):
    month = row["MONTH_ID"]
    if 201902 <= month <= 201904:
        return ["background-color: gray"] * len(row)
    else:
        return [""] * len(row)

(
    diff_77_cust
    .style
    .apply(highlight_trial_rows, axis=1)
    .set_caption("Percentage Difference and T-Values for Store 77 (Customers)")
)

# --------------------------------------
# Plot uplift panels for sales and customers
# --------------------------------------

def plot_uplift_panels(df_full,
                       scaled_sales_df, scaled_cust_df,
                       trial_store, control_store,
                       sd_sales, sd_cust):
    """
    Produce a panel of two plots showing sales uplift and customer uplift
    for a trial store with corresponding 95 percent confidence intervals.
    """

    # --------------------------------------
    # Helper function to generate CI tables
    # --------------------------------------
    def build_uplift_table(df_full, scaled_df, trial_store, control_store, metric_col, stdDev):

        df_plot = df_full.copy()

        # Label store types
        df_plot["Store_type"] = df_plot["STORE_NBR"].apply(
            lambda x: "Trial" if x == trial_store
            else "Control" if x == control_store
            else "Other stores"
        )

        # Aggregate base metric
        agg = (
            df_plot.groupby(["MONTH_ID", "Store_type"])[metric_col]
            .mean()
            .reset_index(name="metric")
        )

        agg["TransactionMonth"] = pd.to_datetime(
            agg["MONTH_ID"].astype(str) + "01",
            format="%Y%m%d"
        )

        # Keep only Trial and Control lines
        agg = agg[agg["Store_type"].isin(["Trial", "Control"])]

        # Compute confidence interval bands
        control_vals = agg[agg["Store_type"] == "Control"].copy()

        upper = control_vals.copy()
        upper["metric"] = upper["metric"] * (1 + stdDev * 2)
        upper["Store_type"] = "Control 95 percent CI"

        lower = control_vals.copy()
        lower["metric"] = lower["metric"] * (1 - stdDev * 2)
        lower["Store_type"] = "Control 5 percent CI"

        final = pd.concat([agg, upper, lower])
        return final

    # --------------------------------------
    # Build uplift datasets for both metrics
    # --------------------------------------
    sales_uplift = build_uplift_table(
        df_full, scaled_sales_df, trial_store, control_store,
        "TOTAL_SALES", sd_sales
    )

    cust_uplift = build_uplift_table(
        df_full, scaled_cust_df, trial_store, control_store,
        "CUSTOMERS", sd_cust
    )

    # --------------------------------------
    # Create the figure layout
    # --------------------------------------
    fig, axes = plt.subplots(1, 2, figsize=(15, 5), sharex=True)

    shade_start = pd.to_datetime("2019-02")
    shade_end = pd.to_datetime("2019-04")

    # --------------------------------------
    # Sales uplift panel
    # --------------------------------------
    axes[0].axvspan(shade_start, shade_end, color="lightgray", alpha=0.3)

    for stype, sub in sales_uplift.groupby("Store_type"):
        axes[0].plot(sub["TransactionMonth"], sub["metric"], label=stype, linewidth=2)

    axes[0].set_title(f"Store {trial_store} Sales Uplift with Confidence Bands")
    axes[0].set_ylabel("Total Sales")
    axes[0].grid(alpha=0.25)

    # --------------------------------------
    # Customers uplift panel
    # --------------------------------------
    axes[1].axvspan(shade_start, shade_end, color="lightgray", alpha=0.3)

    for stype, sub in cust_uplift.groupby("Store_type"):
        axes[1].plot(sub["TransactionMonth"], sub["metric"], label=stype, linewidth=2)

    axes[1].set_title(f"Store {trial_store} Customer Uplift with Confidence Bands")
    axes[1].set_ylabel("Total Customers")
    axes[1].grid(alpha=0.25)

    # Shared legend
    handles, labels = axes[0].get_legend_handles_labels()
    fig.legend(handles, labels, loc="upper center", ncol=3)

    plt.tight_layout(rect=[0, 0, 1, 0.90])
    plt.show()

# --------------------------------------
# Plot uplift panels for each trial store
# --------------------------------------

# Store 77 uplift panels
plot_uplift_panels(
    df_full,
    scaled_sales_store77,
    scaled_cust_store77,
    trial_store77,
    control_store77,
    sd_77_sales,
    sd_77_cust
)

# Store 86 uplift panels
plot_uplift_panels(
    df_full,
    scaled_sales_store86,
    scaled_cust_store86,
    trial_store86,
    control_store86,
    sd_86_sales,
    sd_86_cust
)

# Store 88 uplift panels
plot_uplift_panels(
    df_full,
    scaled_sales_store88,
    scaled_cust_store88,
    trial_store88,
    control_store88,
    sd_88_sales,
    sd_88_cust
)

# --------------------------------------
# Plot percentage uplift for trial months
# --------------------------------------

def plot_uplift(df, trial_store, control_store, metric, title_suffix="Sales"):
    """
    Plot percentage uplift for the trial period based on:
    uplift = (trial - scaled_control) / scaled_control
    """

    # Trial months used in the analysis
    trial_months = [201902, 201903, 201904]

    # Filter to trial months only
    trial_df = df[df["MONTH_ID"].isin(trial_months)]

    # Extract trial and corresponding scaled control values
    trial_vals = trial_df[trial_df["STORE_NBR"] == trial_store][metric].values
    control_vals = trial_df[trial_df["STORE_NBR"] == control_store][f"scaled_{metric}"].values

    # Compute uplift percentage
    uplift = (trial_vals - control_vals) / control_vals * 100

    # Format month labels as YYYY-MM
    month_labels = [f"{str(m)[:4]}-{str(m)[4:]}" for m in trial_months]

    # --------------------------------------
    # Create bar chart
    # --------------------------------------
    plt.figure(figsize=(8, 5))
    plt.bar(month_labels, uplift, color="#2ca02c")

    # Annotate bar values
    for i, v in enumerate(uplift):
        offset = 0.02 * v if v != 0 else 1
        plt.text(i, v + offset, f"{v:.1f} percent", ha="center", fontsize=10)

    plt.axhline(0, color="black", linewidth=1)

    # Title including trial and control stores
    plt.title(
        f"Percentage Uplift for Store {trial_store} (Control {control_store}) — {title_suffix}"
    )

    plt.xlabel("Month")
    plt.ylabel("Uplift (percent)")
    plt.tight_layout()
    plt.show()

# --------------------------------------
# Uplift plots for Store 77
# --------------------------------------

plot_uplift(
    df=scaled_sales_store77,
    trial_store=trial_store77,
    control_store=control_store77,
    metric="TOTAL_SALES",
    title_suffix="Sales"
)

plot_uplift(
    df=scaled_cust_store77,
    trial_store=trial_store77,
    control_store=control_store77,
    metric="CUSTOMERS",
    title_suffix="Customers"
)


# --------------------------------------
# Uplift plots for Store 86
# --------------------------------------

plot_uplift(
    df=scaled_sales_store86,
    trial_store=trial_store86,
    control_store=control_store86,
    metric="TOTAL_SALES",
    title_suffix="Sales"
)

plot_uplift(
    df=scaled_cust_store86,
    trial_store=trial_store86,
    control_store=control_store86,
    metric="CUSTOMERS",
    title_suffix="Customers"
)


# --------------------------------------
# Uplift plots for Store 88
# --------------------------------------

plot_uplift(
    df=scaled_sales_store88,
    trial_store=trial_store88,
    control_store=control_store88,
    metric="TOTAL_SALES",
    title_suffix="Sales"
)

plot_uplift(
    df=scaled_cust_store88,
    trial_store=trial_store88,
    control_store=control_store88,
    metric="CUSTOMERS",
    title_suffix="Customers"
)

	STORE_NBR_CNTRL	STORE_NBR_TRIAL	TOTAL_SALES_std_dist	CUSTOMERS_std_dist
0	1	77	0.957554	0.955703
1	1	86	0.520879	0.579181
2	1	88	0.190486	0.376523
3	2	77	0.942248	0.943522
4	2	86	0.494721	0.529347
...	...	...	...	...
763	271	86	0.950847	0.950166
764	271	88	0.633696	0.747508
765	272	77	0.894178	0.961240
766	272	86	0.658295	0.562569
767	272	88	0.327902	0.359911

	MONTH_ID	scaledControl	trialValue	percentageDiff	tValue	TransactionMonth
0	201807	297.565550	296.8	0.002573	0.051515	2018-07-01
1	201808	292.652187	255.5	0.126950	2.542011	2018-08-01
2	201809	233.998916	225.2	0.037602	0.752940	2018-09-01
3	201810	190.085733	204.5	0.075830	1.518406	2018-10-01
4	201811	216.597421	245.3	0.132516	2.653459	2018-11-01
5	201812	286.408121	267.3	0.066716	1.335911	2018-12-01
6	201901	181.692071	204.4	0.124980	2.502571	2019-01-01
7	201902	249.762622	235.0	0.059107	1.183534	2019-02-01
8	201903	203.802205	278.5	0.366521	7.339116	2019-03-01
9	201904	162.345704	263.5	0.623080	12.476373	2019-04-01
10	201905	352.533799	299.3	0.151003	3.023650	2019-05-01
11	201906	226.219424	264.7	0.170103	3.406093	2019-06-01

	MONTH_ID	scaledControl	trialValue	percentageDiff	tValue	TransactionMonth
0	201807	51.171141	51	0.003344	0.183352	2018-07-01 00:00:00
1	201808	48.161074	47	0.024108	1.321664	2018-08-01 00:00:00
2	201809	42.140940	42	0.003344	0.183352	2018-09-01 00:00:00
3	201810	35.117450	37	0.053607	2.938874	2018-10-01 00:00:00
4	201811	40.134228	41	0.021572	1.182622	2018-11-01 00:00:00
5	201812	47.157718	46	0.024550	1.345883	2018-12-01 00:00:00
6	201901	35.117450	35	0.003344	0.183352	2019-01-01 00:00:00
7	201902	45.151007	45	0.003344	0.183352	2019-02-01 00:00:00
8	201903	40.134228	50	0.245819	13.476388	2019-03-01 00:00:00
9	201904	30.100671	47	0.561427	30.778725	2019-04-01 00:00:00
10	201905	57.191275	55	0.038315	2.100509	2019-05-01 00:00:00
11	201906	41.137584	41	0.003344	0.183352	2019-06-01 00:00:00