<?php
/**
 * AIGate Stats Generator
 * 
 * Walks all subdirectories of ozekidata/services/aigate/logs and creates
 * a summary.json file with aggregated token usage statistics.
 * 
 * Supports incremental updates by tracking directory modification times.
 */

declare(strict_types=1);

// Configuration
define('LOG_BASE_DIR', dirname(__DIR__, 4) . '/ozekidata/services/aigate/logs');
define('OUTPUT_FILE', __DIR__ . '/summary.json');
define('STATE_FILE', __DIR__ . '/summary_state.json');

/**
 * Main execution function
 */
function main(): void
{
    $startTime = microtime(true);
    
    // Initialize or load existing state
    $state = loadState();
    
    // Initialize summary structure
    $summary = [
        'last_updated' => date('c'),
        'directories' => [],
        'overall' => [
            'total_tokens' => 0,
            'by_user' => [],
            'by_apikey' => [],
            'by_provider' => [],
            'by_model' => []
        ]
    ];
    
    // Walk the log directory
    if (!is_dir(LOG_BASE_DIR)) {
        echo "Log directory not found: " . LOG_BASE_DIR . "\n";
        saveSummary($summary);
        return;
    }
    
    $directories = scanLogDirectories(LOG_BASE_DIR);
    echo "Found " . count($directories) . " directories to process\n";
    
    $processedCount = 0;
    $skippedCount = 0;
    
    foreach ($directories as $dirPath) {
        $dirName = basename($dirPath);
        $dirMtime = getDirectoryMtime($dirPath);
        
        // Check if directory needs processing (first run or modified)
        $needsProcessing = !isset($state['directories'][$dirName]) 
            || $state['directories'][$dirName] !== $dirMtime;
        
        if (!$needsProcessing) {
            // Directory hasn't changed, try to load cached data
            $cachedData = loadCachedDirectoryData($dirName);
            if ($cachedData !== null) {
                $summary['directories'][$dirName] = $cachedData;
                mergeIntoOverall($summary['overall'], $cachedData['tokens']);
                $skippedCount++;
                continue;
            }
        }
        
        // Process the directory
        echo "Processing: $dirName\n";
        $dirData = processDirectory($dirPath, $dirMtime);
        
        if (!empty($dirData)) {
            $summary['directories'][$dirName] = $dirData;
            mergeIntoOverall($summary['overall'], $dirData['tokens']);
            $processedCount++;
        }
        
        // Update state
        $state['directories'][$dirName] = $dirMtime;
    }
    
    // Save outputs
    saveSummary($summary);
    saveState($state);
    
    $endTime = microtime(true);
    $duration = round(($endTime - $startTime) * 1000, 2);
    
    echo "\n=== Summary ===\n";
    echo "Processed: $processedCount directories\n";
    echo "Skipped (unchanged): $skippedCount directories\n";
    echo "Total tokens: " . number_format($summary['overall']['total_tokens']) . "\n";
    echo "Duration: {$duration}ms\n";
    echo "Output: " . OUTPUT_FILE . "\n";
}

/**
 * Scan all YYYYMM/ and YYYYMMDD/ subdirectories
 */
function scanLogDirectories(string $baseDir): array
{
    $directories = [];
    
    if (!is_dir($baseDir)) {
        return $directories;
    }
    
    // Use RecursiveDirectoryIterator to walk all subdirectories
    $iterator = new RecursiveIteratorIterator(
        new RecursiveDirectoryIterator($baseDir, RecursiveDirectoryIterator::SKIP_DOTS),
        RecursiveIteratorIterator::SELF_FIRST
    );
    
    foreach ($iterator as $item) {
        if ($item->isDir() && is_dir($item->getPathname() . '/index.jlog')) {
            $dirName = $item->getFilename();
            // Match YYYYMM or YYYYMMDD pattern
            if (preg_match('/^\d{6}$/', $dirName) || preg_match('/^\d{8}$/', $dirName)) {
                $directories[] = $item->getPathname();
            }
        }
    }
    
    sort($directories);
    return $directories;
}

/**
 * Get the maximum modification time of a directory and its contents
 */
function getDirectoryMtime(string $dirPath): int
{
    $maxMtime = filemtime($dirPath);
    
    $iterator = new RecursiveIteratorIterator(
        new RecursiveDirectoryIterator($dirPath, RecursiveDirectoryIterator::SKIP_DOTS)
    );
    
    foreach ($iterator as $item) {
        if ($item->isFile()) {
            $fileMtime = filemtime($item->getPathname());
            if ($fileMtime > $maxMtime) {
                $maxMtime = $fileMtime;
            }
        }
    }
    
    return $maxMtime;
}

/**
 * Process a single directory and extract token statistics
 */
function processDirectory(string $dirPath, int $dirMtime): ?array
{
    $jlogPath = $dirPath . '/index.jlog';
    
    if (!file_exists($jlogPath)) {
        return null;
    }
    
    // Initialize aggregation data
    $tokens = [
        'total' => 0,
        'by_user' => [],
        'by_apikey' => [],
        'by_provider' => [],
        'by_model' => []
    ];
    
    // Read and parse JSON Lines file
    $handle = fopen($jlogPath, 'r');
    if ($handle === false) {
        echo "  Warning: Could not open $jlogPath\n";
        return null;
    }
    
    $lineNumber = 0;
    while (($line = fgets($handle)) !== false) {
        $lineNumber++;
        $line = trim($line);
        
        if (empty($line)) {
            continue;
        }
        
        $record = json_decode($line, true);
        
        if (json_last_error() !== JSON_ERROR_NONE) {
            // Skip malformed JSON lines
            continue;
        }
        
        // Extract fields with fallbacks
        $userid = $record['userid'] ?? '';
        $apikeyhash = $record['apikeyhash'] ?? '';
        $providerid = $record['providerid'] ?? '';
        $requestmodel = $record['requestmodel'] ?? '';
        $inputtokens = isset($record['inputtokens']) ? (int)$record['inputtokens'] : 0;
        $outputtokens = isset($record['outputtokens']) ? (int)$record['outputtokens'] : 0;
        
        // Use empty string or "unknown" as fallback
        if (empty($userid)) $userid = 'unknown';
        if (empty($apikeyhash)) $apikeyhash = 'unknown';
        if (empty($providerid)) $providerid = 'unknown';
        if (empty($requestmodel)) $requestmodel = 'unknown';
        
        $totalTokens = $inputtokens + $outputtokens;
        
        if ($totalTokens === 0) {
            continue;
        }
        
        // Aggregate by user
        if (!isset($tokens['by_user'][$userid])) {
            $tokens['by_user'][$userid] = 0;
        }
        $tokens['by_user'][$userid] += $totalTokens;
        
        // Aggregate by apikey
        if (!isset($tokens['by_apikey'][$apikeyhash])) {
            $tokens['by_apikey'][$apikeyhash] = 0;
        }
        $tokens['by_apikey'][$apikeyhash] += $totalTokens;
        
        // Aggregate by provider
        if (!isset($tokens['by_provider'][$providerid])) {
            $tokens['by_provider'][$providerid] = 0;
        }
        $tokens['by_provider'][$providerid] += $totalTokens;
        
        // Aggregate by model
        if (!isset($tokens['by_model'][$requestmodel])) {
            $tokens['by_model'][$requestmodel] = 0;
        }
        $tokens['by_model'][$requestmodel] += $totalTokens;
        
        $tokens['total'] += $totalTokens;
    }
    
    fclose($handle);
    
    // Sort arrays by token count (descending)
    arsort($tokens['by_user']);
    arsort($tokens['by_apikey']);
    arsort($tokens['by_provider']);
    arsort($tokens['by_model']);
    
    return [
        'last_modified' => date('c', $dirMtime),
        'tokens' => $tokens
    ];
}

/**
 * Merge directory tokens into overall summary
 */
function mergeIntoOverall(array &$overall, array $dirTokens): void
{
    $overall['total_tokens'] += $dirTokens['total'];
    
    foreach ($dirTokens['by_user'] as $userid => $count) {
        if (!isset($overall['by_user'][$userid])) {
            $overall['by_user'][$userid] = 0;
        }
        $overall['by_user'][$userid] += $count;
    }
    
    foreach ($dirTokens['by_apikey'] as $apikeyhash => $count) {
        if (!isset($overall['by_apikey'][$apikeyhash])) {
            $overall['by_apikey'][$apikeyhash] = 0;
        }
        $overall['by_apikey'][$apikeyhash] += $count;
    }
    
    foreach ($dirTokens['by_provider'] as $providerid => $count) {
        if (!isset($overall['by_provider'][$providerid])) {
            $overall['by_provider'][$providerid] = 0;
        }
        $overall['by_provider'][$providerid] += $count;
    }
    
    foreach ($dirTokens['by_model'] as $model => $count) {
        if (!isset($overall['by_model'][$model])) {
            $overall['by_model'][$model] = 0;
        }
        $overall['by_model'][$model] += $count;
    }
    
    // Sort overall arrays
    arsort($overall['by_user']);
    arsort($overall['by_apikey']);
    arsort($overall['by_provider']);
    arsort($overall['by_model']);
}

/**
 * Load cached state from file
 */
function loadState(): array
{
    if (!file_exists(STATE_FILE)) {
        return ['directories' => []];
    }
    
    $content = file_get_contents(STATE_FILE);
    if ($content === false) {
        return ['directories' => []];
    }
    
    $state = json_decode($content, true);
    if (json_last_error() !== JSON_ERROR_NONE) {
        return ['directories' => []];
    }
    
    return $state;
}

/**
 * Save state to file
 */
function saveState(array $state): void
{
    $json = json_encode($state, JSON_PRETTY_PRINT);
    file_put_contents(STATE_FILE, $json);
}

/**
 * Load cached directory data from summary
 */
function loadCachedDirectoryData(string $dirName): ?array
{
    if (!file_exists(OUTPUT_FILE)) {
        return null;
    }
    
    $content = file_get_contents(OUTPUT_FILE);
    if ($content === false) {
        return null;
    }
    
    $summary = json_decode($content, true);
    if (json_last_error() !== JSON_ERROR_NONE) {
        return null;
    }
    
    return $summary['directories'][$dirName] ?? null;
}

/**
 * Save summary to file
 */
function saveSummary(array $summary): void
{
    $json = json_encode($summary, JSON_PRETTY_PRINT);
    file_put_contents(OUTPUT_FILE, $json);
}

// Run the script
main();