RecruIT/app/Services/AIAnalysisService.php

<?php

namespace App\Services;

use App\Models\Candidate;
use App\Models\Document;
use Smalot\PdfParser\Parser;
use Illuminate\Support\Facades\Storage;
use Illuminate\Support\Facades\Http;
use Illuminate\Support\Facades\Log;

class AIAnalysisService
{
    protected $parser;

    public function __construct()
    {
        $this->parser = new Parser();
    }

    /**
     * Analyze a candidate against their assigned Job Position.
     */
    public function analyze(Candidate $candidate, ?string $provider = null)
    {
        if (!$candidate->job_position_id) {
            throw new \Exception("Le candidat n'est associé à aucune fiche de poste.");
        }

        $candidate->load(['documents', 'jobPosition']);

        $cvText = $this->extractTextFromDocument($candidate->documents->where('type', 'cv')->first());
        $letterText = $this->extractTextFromDocument($candidate->documents->where('type', 'cover_letter')->first());

        if (!$cvText) {
            throw new \Exception("Impossible d'extraire le texte du CV.");
        }

        return $this->callAI($candidate, $cvText, $letterText, $provider);
    }

    /**
     * Extract text from a PDF document.
     */
    protected function extractTextFromDocument(?Document $document): ?string
    {
        if (!$document || !Storage::disk('local')->exists($document->file_path)) {
            return null;
        }

        try {
            $pdf = $this->parser->parseFile(Storage::disk('local')->path($document->file_path));
            $text = $pdf->getText();
            return $this->cleanText($text);
        } catch (\Exception $e) {
            Log::error("PDF Extraction Error: " . $e->getMessage());
            return null;
        }
    }

    /**
     * Clean text to ensure it's valid UTF-8 and fits well in JSON.
     */
    protected function cleanText(string $text): string
    {
        // Remove non-UTF8 characters
        $text = mb_convert_encoding($text, 'UTF-8', 'UTF-8');

        // Remove control characters (except newlines and tabs)
        $text = preg_replace('/[^\x20-\x7E\xA0-\xFF\x0A\x0D\x09]/u', '', $text);

        return trim($text);
    }

    /**
     * Call the AI API.
     */
    protected function callAI(Candidate $candidate, string $cvText, ?string $letterText, ?string $provider = null)
    {
        $provider = $provider ?: env('AI_DEFAULT_PROVIDER', 'ollama');

        $job = $candidate->jobPosition;

        // --- BYPASS LOGIC ---
        if ($job->ai_bypass_base_prompt && !empty($job->ai_prompt)) {
            $staticPrompt = $job->ai_prompt;
            // We still append the JSON requirement to ensure the frontend doesn't crash,
            // unless the user specifically asked for "pure" takeover.
            // Most users want to control the "logic" not the "serialization format".
            if (!str_contains(strtolower($staticPrompt), 'json')) {
                $staticPrompt .= "\n\nRéponds UNIQUEMENT en JSON pur. Format attendu:\n" . config('ai.defaults.json_format');
            }
        } else {
            // --- STANDARD LOGIC ---
            // Base instructions from config
            $baseInstruction = config('ai.defaults.base_instruction');
            $jsonFormat = config('ai.defaults.json_format');

            $staticPrompt = "{$baseInstruction} Ton rôle est d'analyser le profil d'un candidat pour le poste de '{$job->title}'.\n\n";

            $staticPrompt .= "DESCRIPTION DU POSTE:\n{$job->description}\n\n";

            if (!empty($job->requirements)) {
                $staticPrompt .= "COMPÉTENCES REQUISES:\n" . implode(", ", $job->requirements) . "\n\n";
            }

            if (!$job->ai_prompt) {
                // Default generalist analysis instructions
                $staticPrompt .= "CONSIGNES D'ANALYSE:\n" . config('ai.defaults.analysis_instructions') . "\n\n";
            } else {
                // Specific instructions from the job position
                $staticPrompt .= "CONSIGNES D'ANALYSE SPÉCIFIQUES:\n" . $job->ai_prompt . "\n\n";
            }

            $staticPrompt .= "FORMAT DE RÉPONSE ATTENDU:\n{$jsonFormat}\n";
        }

        $staticPrompt .= "\nRéponds UNIQUEMENT en JSON pur, sans texte avant ou après. Assure-toi que le JSON est valide.";

        // Dynamic Part: The candidate data (Not cached)
        $dynamicPrompt = "CONTENU DU CV DU CANDIDAT:\n{$cvText}\n\nCONTENU DE LA LETTRE DE MOTIVATION:\n" . ($letterText ?? "Non fournie");

        // Full prompt for providers not using context caching
        $fullPrompt = $staticPrompt . "\n\n" . $dynamicPrompt;

        $analysis = match ($provider) {
            'openai' => $this->callOpenAI($fullPrompt),
            'anthropic' => $this->callAnthropic($fullPrompt),
            'gemini' => $this->callGemini($dynamicPrompt, $staticPrompt, $job),
            default => $this->callOllama($fullPrompt),
        };

        // Normalize keys for frontend compatibility
        $normalized = $this->normalizeAnalysis($analysis);

        // Inject metadata
        $normalized['provider'] = $provider;
        $normalized['analyzed_at'] = now()->toIso8601String();

        return $normalized;
    }

    /**
     * Normalize the AI response keys to ensure frontend compatibility.
     */
    protected function normalizeAnalysis(array $data): array
    {
        $normalized = $data;

        // Map custom keys to standard keys if they exist
        if (isset($data['score_global']) && !isset($data['match_score'])) {
            $normalized['match_score'] = $data['score_global'];
        }

        if (isset($data['score']) && !isset($data['match_score'])) {
            $normalized['match_score'] = $data['score'];
        }

        if (isset($data['points_forts']) && !isset($data['strengths'])) {
            $normalized['strengths'] = $data['points_forts'];
        }

        if (isset($data['points_faibles']) && !isset($data['gaps'])) {
            $normalized['gaps'] = $data['points_faibles'];
        }

        if (isset($data['recommandation']) && !isset($data['verdict'])) {
            $normalized['verdict'] = $data['recommandation'];
        }

        if (isset($data['synthese']) && !isset($data['summary'])) {
            $normalized['summary'] = $data['synthese'];
        }

        // List-specific normalization (handle list of objects or strings)
        $cleanList = function($list) {
            if (!is_array($list)) return [];
            return array_map(function($item) {
                if (is_array($item)) {
                    $type = $item['type'] ?? $item['title'] ?? $item['category'] ?? null;
                    $desc = $item['description'] ?? $item['value'] ?? $item['content'] ?? null;
                    if ($type && $desc) return "{$type} : {$desc}";
                    if ($desc) return $desc;
                    if ($type) return $type;
                    return json_encode($item);
                }
                return (string) $item;
            }, $list);
        };

        if (isset($normalized['strengths'])) {
            $normalized['strengths'] = $cleanList($normalized['strengths']);
        }

        if (isset($normalized['gaps'])) {
            $normalized['gaps'] = $cleanList($normalized['gaps']);
        }

        if (isset($normalized['elements_bloquants'])) {
            $normalized['elements_bloquants'] = $cleanList($normalized['elements_bloquants']);
        }

        // Ensure match_score is a numeric value and handle common AI formatting quirks
        if (isset($normalized['match_score'])) {
            $scoreValue = $normalized['match_score'];

            if (is_string($scoreValue)) {
                // If AI returns something like "18/20", take the first part
                if (str_contains($scoreValue, '/')) {
                    $scoreValue = explode('/', $scoreValue)[0];
                }
                // Convert comma to dot for European decimals
                $scoreValue = str_replace(',', '.', $scoreValue);
                // Keep only digits and the first decimal point
                $scoreValue = preg_replace('/[^0-9.]/', '', $scoreValue);
            }

            $num = (float)$scoreValue;

            // If the AI returned a ratio beneath 1 (e.g. 0.85 for 85%), scale it up
            if ($num > 0 && $num < 1.1 && !is_int($normalized['match_score'])) {
                 // But be careful: a score of "1" might honestly be 1/100
                 // but 0.95 is almost certainly a ratio.
                 if ($num < 1 || str_contains((string)$normalized['match_score'], '.')) {
                    $num *= 100;
                 }
            }

            // Cap at 100
            $normalized['match_score'] = (int) min(100, round($num));
        }

        // Ensure default keys exist even if empty
        $normalized['match_score'] = $normalized['match_score'] ?? 0;
        $normalized['summary'] = $normalized['summary'] ?? "Pas de résumé généré.";
        $normalized['verdict'] = $normalized['verdict'] ?? "Indéterminé";
        $normalized['strengths'] = $normalized['strengths'] ?? [];
        $normalized['gaps'] = $normalized['gaps'] ?? [];
        $normalized['scores_detailles'] = $normalized['scores_detailles'] ?? null;
        $normalized['elements_bloquants'] = $normalized['elements_bloquants'] ?? [];
        $normalized['questions_entretien_suggerees'] = $normalized['questions_entretien_suggerees'] ?? [];

        return $normalized;
    }

    protected function callOllama(string $prompt)
    {
        $ollamaUrl = env('OLLAMA_URL', 'http://localhost:11434/api/generate');
        $ollamaModel = env('OLLAMA_MODEL', 'mistral');

        try {
            $response = Http::timeout(120)->post($ollamaUrl, [
                'model' => $ollamaModel,
                'prompt' => $prompt,
                'stream' => false,
                'format' => 'json'
            ]);

            if ($response->successful()) {
                return json_decode($response->json('response'), true);
            } else {
                Log::warning("AI Provider Error (Ollama): HTTP " . $response->status() . " - " . $response->body());
            }
        } catch (\Exception $e) {
            Log::error("AI Connection Failed (Ollama): " . $e->getMessage());
        }

        return $this->getSimulatedAnalysis();
    }

    protected function callOpenAI(string $prompt)
    {
        $apiKey = env('OPENAI_API_KEY');
        if (!$apiKey) return $this->getSimulatedAnalysis();

        try {
            $response = Http::withToken($apiKey)->timeout(60)->post('https://api.openai.com/v1/chat/completions', [
                'model' => 'gpt-4o',
                'messages' => [['role' => 'user', 'content' => $prompt]],
                'response_format' => ['type' => 'json_object']
            ]);

            if ($response->successful()) {
                return json_decode($response->json('choices.0.message.content'), true);
            }
        } catch (\Exception $e) {
            Log::error("OpenAI Analysis Failed: " . $e->getMessage());
        }

        return $this->getSimulatedAnalysis();
    }

    protected function callAnthropic(string $prompt)
    {
        $apiKey = env('ANTHROPIC_API_KEY');
        if (!$apiKey) return $this->getSimulatedAnalysis();

        try {
            $response = Http::withHeaders([
                'x-api-key' => $apiKey,
                'anthropic-version' => '2023-06-01',
                'content-type' => 'application/json'
            ])->timeout(60)->post('https://api.anthropic.com/v1/messages', [
                'model' => 'claude-3-5-sonnet-20240620',
                'max_tokens' => 2048,
                'messages' => [['role' => 'user', 'content' => $prompt]]
            ]);

            if ($response->successful()) {
                $content = $response->json('content.0.text');
                return json_decode($this->extractJson($content), true);
            }
        } catch (\Exception $e) {
            Log::error("Anthropic Analysis Failed: " . $e->getMessage());
        }

        return $this->getSimulatedAnalysis();
    }

    protected function callGemini(string $dynamicPrompt, ?string $staticPrompt = null, ?\App\Models\JobPosition $job = null)
    {
        $apiKey = env('GEMINI_API_KEY');
        if (!$apiKey) return $this->getSimulatedAnalysis();

        // Models to try in order (Updated for 2026 models)
        $models = [
            'gemini-3.1-flash-lite-preview',
            'gemini-3-flash-preview',
            'gemini-1.5-flash-latest'
        ];

        foreach ($models as $model) {
            try {
                $version = (str_contains($model, '2.0') || str_contains($model, '3.')) ? 'v1beta' : 'v1';
                $url = "https://generativelanguage.googleapis.com/{$version}/models/{$model}:generateContent?key=" . $apiKey;

                $generationConfig = [
                'temperature' => 0.2,
                'responseMimeType' => 'application/json'
            ];

                $payload = [
                    'generationConfig' => $generationConfig,
                    'contents' => [
                        ['role' => 'user', 'parts' => [['text' => $dynamicPrompt]]]
                    ]
                ];

                // Attempt to use Context Caching if static prompt and job are provided
                if ($staticPrompt && $job && $version === 'v1beta') {
                    $cacheId = $this->getOrCreateContextCache($job, $staticPrompt, $model);
                    if ($cacheId) {
                        $payload['cachedContent'] = $cacheId;
                        // When using cache, the static part is already in the cache
                    } else {
                        // Fallback: prepend static part if cache fails
                        $payload['contents'][0]['parts'][0]['text'] = $staticPrompt . "\n\n" . $dynamicPrompt;
                    }
                } else if ($staticPrompt) {
                     // Non-cached fallback
                     $payload['contents'][0]['parts'][0]['text'] = $staticPrompt . "\n\n" . $dynamicPrompt;
                }

                $response = Http::timeout(60)->post($url, $payload);

                if ($response->successful()) {
                    $candidate = $response->json('candidates.0');
                    if (isset($candidate['finishReason']) && $candidate['finishReason'] !== 'STOP') {
                        Log::warning("Gemini warning: Analysis finished with reason " . $candidate['finishReason']);
                    }

                    $text = $candidate['content']['parts'][0]['text'] ?? null;
                    if ($text) {
                        $json = $this->extractJson($text);
                        $decoded = json_decode($json, true);
                        if ($decoded) return $decoded;
                    }
                } else {
                    Log::error("Gemini API Error ($model): " . $response->status() . " - " . $response->body());
                }
            } catch (\Exception $e) {
                Log::error("Gemini Connection Failed ($model): " . $e->getMessage());
            }
        }

        return $this->getSimulatedAnalysis();
    }

    /**
     * Get or create a Gemini Context Cache for a specific Job Position.
     */
    protected function getOrCreateContextCache(\App\Models\JobPosition $job, string $staticPrompt, string $model)
    {

        if (strlen($staticPrompt) < 120000) {
        return null;
    }

        // Check if we already have a valid cache for this job
        if ($job->gemini_cache_id && $job->gemini_cache_expires_at && $job->gemini_cache_expires_at->isFuture()) {
             // Basic verification: the cache is tied to a specific model
             // We assume the stored cache is for the primary model
             return $job->gemini_cache_id;
        }

        $apiKey = env('GEMINI_API_KEY');
        try {
            // Create Context Cache (TTL of 1 hour)
            $response = Http::timeout(30)->post("https://generativelanguage.googleapis.com/v1beta/cachedContents?key=" . $apiKey, [
                'model' => "models/{$model}",
                'contents' => [
                    ['role' => 'user', 'parts' => [['text' => $staticPrompt]]]
                ],
                'ttl' => '3600s'
            ]);

            if ($response->successful()) {
                $cacheId = $response->json('name');
                $job->update([
                    'gemini_cache_id' => $cacheId,
                    'gemini_cache_expires_at' => now()->addHour()
                ]);
                return $cacheId;
            }

            // Log l'erreur pour comprendre pourquoi le cache a été refusé
        Log::warning("Gemini Cache Refused: " . $response->body());

        } catch (\Exception $e) {
            Log::error("Gemini Cache Lifecycle Error: " . $e->getMessage());
        }

        return null;
    }

    private function extractJson($string)
    {
        preg_match('/\{.*\}/s', $string, $matches);
        return $matches[0] ?? '{}';
    }

    private function getSimulatedAnalysis()
    {
        return [
            'match_score' => 75,
            'summary' => "Analyse simulée (IA non connectée ou erreur API). Le candidat peut avoir un profil intéressant mais une vérification manuelle est nécessaire.",
            'strengths' => ["Expérience pertinente", "Bonne présentation"],
            'gaps' => ["Compétences spécifiques à confirmer"],
            'verdict' => "Favorable"
        ];
    }
}