#!/usr/bin/env python3
"""
Batch KU Validation Tool
Validates all KUs in the atoms directory and generates a report.
"""

import json
from pathlib import Path
from collections import defaultdict

def load_ku_binary(ku_file: Path) -> dict:
    """Load a Knowledge Unit from binary .ku file."""
    with open(ku_file, 'rb') as f:
        content = f.read()
    
    # Find JSON start (after KUAT header)
    json_start = content.find(b'{')
    if json_start == -1:
        raise ValueError(f"No JSON found in {ku_file}")
    
    json_data = content[json_start:].decode('utf-8', errors='ignore')
    return json.loads(json_data)

def validate_ku_structure(ku: dict) -> tuple:
    """Validate KU structure. Returns (is_valid, errors)."""
    errors = []
    
    # Check required fields
    required_fields = ['id', 'domain', 'invariant']
    for field in required_fields:
        if field not in ku:
            errors.append(f"Missing required field: {field}")
    
    # Validate ID
    if 'id' in ku:
        if not isinstance(ku['id'], str) or not ku['id']:
            errors.append("Invalid ID: must be non-empty string")
    
    # Validate domain
    if 'domain' in ku:
        if not isinstance(ku['domain'], str) or not ku['domain']:
            errors.append("Invalid domain: must be non-empty string")
    
    # Validate invariant
    if 'invariant' in ku:
        inv = ku['invariant']
        if not isinstance(inv, (dict, str)):
            errors.append("Invalid invariant: must be dict or string")
    
    return (len(errors) == 0, errors)

def main():
    atoms_dir = Path('/home/shri/Desktop/nanowiki/nanocern_cli/atoms')
    
    print("="*80)
    print("NANOCERN KU VALIDATION REPORT")
    print("="*80)
    print()
    
    ku_files = list(atoms_dir.glob('*.ku'))
    
    stats = {
        'total_files': len(ku_files),
        'loaded_successfully': 0,
        'load_failed': 0,
        'validation_passed': 0,
        'validation_failed': 0,
        'domains': defaultdict(int),
        'load_errors': [],
        'validation_errors': []
    }
    
    for ku_file in sorted(ku_files):
        try:
            # Try to load
            ku = load_ku_binary(ku_file)
            stats['loaded_successfully'] += 1
            
            # Validate structure
            is_valid, errors = validate_ku_structure(ku)
            
            if is_valid:
                stats['validation_passed'] += 1
                domain = ku.get('domain', 'unknown')
                stats['domains'][domain] += 1
            else:
                stats['validation_failed'] += 1
                stats['validation_errors'].append((ku_file.name, errors))
                
        except Exception as e:
            stats['load_failed'] += 1
            stats['load_errors'].append((ku_file.name, str(e)))
    
    # Print summary
    print(f"📊 Total KU Files: {stats['total_files']}")
    print(f"✅ Loaded Successfully: {stats['loaded_successfully']}")
    print(f"❌ Load Failed: {stats['load_failed']}")
    print(f"✅ Validation Passed: {stats['validation_passed']}")
    print(f"❌ Validation Failed: {stats['validation_failed']}")
    print()
    
    print(f"🏷️  Valid KUs by Domain:")
    for domain, count in sorted(stats['domains'].items(), key=lambda x: x[1], reverse=True):
        print(f"  {domain:20s}: {count:4d}")
    print()
    
    # Print errors (first 10)
    if stats['load_errors']:
        print(f"⚠️  Load Errors ({len(stats['load_errors'])} total, showing first 10):")
        for ku_file, error in stats['load_errors'][:10]:
            print(f"  {ku_file}: {error[:80]}")
        print()
    
    if stats['validation_errors']:
        print(f"⚠️  Validation Errors ({len(stats['validation_errors'])} total):")
        for ku_file, errors in stats['validation_errors']:
            print(f"  {ku_file}:")
            for error in errors:
                print(f"    - {error}")
        print()
    
    # Save report
    report_path = atoms_dir.parent / 'validation_report.json'
    with open(report_path, 'w') as f:
        json.dump({
            'summary': {
                'total_files': stats['total_files'],
                'loaded_successfully': stats['loaded_successfully'],
                'load_failed': stats['load_failed'],
                'validation_passed': stats['validation_passed'],
                'validation_failed': stats['validation_failed']
            },
            'domains': dict(stats['domains']),
            'load_errors': stats['load_errors'][:50],  # Limit to 50
            'validation_errors': stats['validation_errors']
        }, f, indent=2)
    
    print(f"📄 Full report saved to: {report_path}")
    print("="*80)

if __name__ == '__main__':
    main()
