#!/usr/bin/env python3
"""
SPARC Correlation Analysis
Investigates the variance in α_ISL by correlating with V_flat (mass proxy).

Hypotheses:
1. Power Law: α_ISL ∝ V_flat^β
2. Bimodal: Distinct regimes for Dwarfs (V_flat < 100 km/s) vs Giants (V_flat > 100 km/s)
"""

import json
import numpy as np
from pathlib import Path

def load_results():
    path = "/home/shri/Desktop/MATHTRUTH/cosmic_synthesis/reports/SPARC_FULL_ANALYSIS_RESULTS.json"
    with open(path, 'r') as f:
        return json.load(f)

def load_galaxy_data(galaxy_name):
    # Load raw data to get V_obs for V_flat calculation
    path = f"/home/shri/Desktop/MATHTRUTH/sparc_data/{galaxy_name}_rotmod.dat"
    try:
        data = np.loadtxt(path, comments='#')
        return data  # [Rad, Vobs, errV, ...]
    except Exception as e:
        return None

def main():
    print("="*60)
    print("SPARC Correlation Analysis: α_ISL vs V_flat")
    print("="*60)

    results = load_results()
    valid_results = [r for r in results if r['fit_quality'] != 'failed']
    
    analysis_data = []

    print(f"Analyzing {len(valid_results)} valid fits...")

    for r in valid_results:
        raw_data = load_galaxy_data(r['name'])
        if raw_data is None or len(raw_data) == 0:
            continue
            
        # V_flat proxy: Average of last 3 points (or max if <3)
        v_obs = raw_data[:, 1]
        if len(v_obs) >= 3:
            v_flat = np.mean(v_obs[-3:])
        else:
            v_flat = np.max(v_obs)
            
        analysis_data.append({
            'name': r['name'],
            'alpha_isl': r['isl_alpha_isl'],
            'v_flat': v_flat,
            'fit_quality': r['fit_quality'],
            'chi2_red': r['isl_red_chi2']
        })

    # Convert to arrays for vector ops
    v_flats = np.array([d['v_flat'] for d in analysis_data])
    alphas = np.array([d['alpha_isl'] for d in analysis_data])
    
    # 1. Overall Correlation
    corr_coef = np.corrcoef(v_flats, alphas)[0, 1]
    print(f"\n1. Overall Correlation (Pearson r): {corr_coef:.4f}")
    
    # 2. Power Law Fit: alpha = A * v_flat^B
    # Log-log linear fit
    # Filter alpha > 0 for log
    mask = alphas > 0
    log_v = np.log10(v_flats[mask])
    log_a = np.log10(alphas[mask])
    
    coeffs = np.polyfit(log_v, log_a, 1)
    beta = coeffs[0]
    A = 10**coeffs[1]
    
    print(f"\n2. Power Law Fit: α_ISL = {A:.4f} * (V_flat)^{beta:.4f}")
    if abs(beta) > 0.3:
        print("   -> Strong mass dependence detected!")
    else:
        print("   -> Weak mass dependence.")

    # 3. Bimodal Split Test (at 100 km/s)
    threshold = 100.0
    dwarfs = [d for d in analysis_data if d['v_flat'] < threshold]
    giants = [d for d in analysis_data if d['v_flat'] >= threshold]
    
    def analyze_group(group, name):
        if not group:
            print(f"\n{name}: No galaxies")
            return
        
        vals = np.array([g['alpha_isl'] for g in group])
        mean = np.mean(vals)
        std = np.std(vals)
        var_pct = (std / mean) * 100 if mean > 0 else 0
        
        print(f"\n{name} (N={len(group)}):")
        print(f"  Mean α_ISL: {mean:.4f}")
        print(f"  Variance:   {var_pct:.2f}%")
        if var_pct < 20:
            print("  -> PASSED universality test (<20%)")
        else:
            print("  -> FAILED universality test")

    print("\n3. Bimodal Split Test (Threshold: 100 km/s)")
    analyze_group(dwarfs, "Dwarfs (V < 100)")
    analyze_group(giants, "Giants (V >= 100)")
    
    # 4. Outlier Hunt
    print(f"\n4. Outlier Hunt")
    # Defined as > 2 std dev from power law prediction
    pred_alphas = A * (v_flats ** beta)
    residuals = alphas - pred_alphas
    std_res = np.std(residuals)
    outliers = [d for i, d in enumerate(analysis_data) if abs(residuals[i]) > 2 * std_res]
    
    print(f"  Found {len(outliers)} outliers deviating from power law:")
    for o in outliers[:5]:
        print(f"  - {o['name']}: V={o['v_flat']:.1f}, α={o['alpha_isl']:.3f} (Qual: {o['fit_quality']})")
    if len(outliers) > 5: print(f"    ...and {len(outliers)-5} more.")

    # Save Analysis
    output = {
        'correlation_r': corr_coef,
        'power_law': {'A': A, 'beta': beta},
        'bimodal_split': {
            'threshold': threshold,
            'dwarfs': {'mean': np.mean([d['alpha_isl'] for d in dwarfs]), 'var_pct': (np.std([d['alpha_isl'] for d in dwarfs])/np.mean([d['alpha_isl'] for d in dwarfs])*100)},
            'giants': {'mean': np.mean([d['alpha_isl'] for d in giants]), 'var_pct': (np.std([d['alpha_isl'] for d in giants])/np.mean([d['alpha_isl'] for d in giants])*100)}
        }
    }
    
    with open('/home/shri/Desktop/MATHTRUTH/cosmic_synthesis/reports/SPARC_CORRELATION_RESULTS.json', 'w') as f:
        json.dump(output, f, indent=2)

if __name__ == "__main__":
    main()
