Files
RecruIT/app/Services/AIAnalysisService.php

453 lines
17 KiB
PHP

<?php
namespace App\Services;
use App\Models\Candidate;
use App\Models\Document;
use Smalot\PdfParser\Parser;
use Illuminate\Support\Facades\Storage;
use Illuminate\Support\Facades\Http;
use Illuminate\Support\Facades\Log;
class AIAnalysisService
{
protected $parser;
public function __construct()
{
$this->parser = new Parser();
}
/**
* Analyze a candidate against their assigned Job Position.
*/
public function analyze(Candidate $candidate, ?string $provider = null)
{
if (!$candidate->job_position_id) {
throw new \Exception("Le candidat n'est associé à aucune fiche de poste.");
}
$candidate->load(['documents', 'jobPosition']);
$cvText = $this->extractTextFromDocument($candidate->documents->where('type', 'cv')->first());
$letterText = $this->extractTextFromDocument($candidate->documents->where('type', 'cover_letter')->first());
if (!$cvText) {
throw new \Exception("Impossible d'extraire le texte du CV.");
}
return $this->callAI($candidate, $cvText, $letterText, $provider);
}
/**
* Extract text from a PDF document.
*/
protected function extractTextFromDocument(?Document $document): ?string
{
if (!$document || !Storage::disk('local')->exists($document->file_path)) {
return null;
}
try {
$pdf = $this->parser->parseFile(Storage::disk('local')->path($document->file_path));
$text = $pdf->getText();
return $this->cleanText($text);
} catch (\Exception $e) {
Log::error("PDF Extraction Error: " . $e->getMessage());
return null;
}
}
/**
* Clean text to ensure it's valid UTF-8 and fits well in JSON.
*/
protected function cleanText(string $text): string
{
// Remove non-UTF8 characters
$text = mb_convert_encoding($text, 'UTF-8', 'UTF-8');
// Remove control characters (except newlines and tabs)
$text = preg_replace('/[^\x20-\x7E\xA0-\xFF\x0A\x0D\x09]/u', '', $text);
return trim($text);
}
/**
* Call the AI API.
*/
protected function callAI(Candidate $candidate, string $cvText, ?string $letterText, ?string $provider = null)
{
$provider = $provider ?: env('AI_DEFAULT_PROVIDER', 'ollama');
$job = $candidate->jobPosition;
// --- BYPASS LOGIC ---
if ($job->ai_bypass_base_prompt && !empty($job->ai_prompt)) {
$staticPrompt = $job->ai_prompt;
// We still append the JSON requirement to ensure the frontend doesn't crash,
// unless the user specifically asked for "pure" takeover.
// Most users want to control the "logic" not the "serialization format".
if (!str_contains(strtolower($staticPrompt), 'json')) {
$staticPrompt .= "\n\nRéponds UNIQUEMENT en JSON pur. Format attendu:\n" . config('ai.defaults.json_format');
}
} else {
// --- STANDARD LOGIC ---
// Base instructions from config
$baseInstruction = config('ai.defaults.base_instruction');
$jsonFormat = config('ai.defaults.json_format');
$staticPrompt = "{$baseInstruction} Ton rôle est d'analyser le profil d'un candidat pour le poste de '{$job->title}'.\n\n";
$staticPrompt .= "DESCRIPTION DU POSTE:\n{$job->description}\n\n";
if (!empty($job->requirements)) {
$staticPrompt .= "COMPÉTENCES REQUISES:\n" . implode(", ", $job->requirements) . "\n\n";
}
if (!$job->ai_prompt) {
// Default generalist analysis instructions
$staticPrompt .= "CONSIGNES D'ANALYSE:\n" . config('ai.defaults.analysis_instructions') . "\n\n";
} else {
// Specific instructions from the job position
$staticPrompt .= "CONSIGNES D'ANALYSE SPÉCIFIQUES:\n" . $job->ai_prompt . "\n\n";
}
$staticPrompt .= "FORMAT DE RÉPONSE ATTENDU:\n{$jsonFormat}\n";
}
$staticPrompt .= "\nRéponds UNIQUEMENT en JSON pur, sans texte avant ou après. Assure-toi que le JSON est valide.";
// Dynamic Part: The candidate data (Not cached)
$dynamicPrompt = "CONTENU DU CV DU CANDIDAT:\n{$cvText}\n\nCONTENU DE LA LETTRE DE MOTIVATION:\n" . ($letterText ?? "Non fournie");
// Full prompt for providers not using context caching
$fullPrompt = $staticPrompt . "\n\n" . $dynamicPrompt;
$analysis = match ($provider) {
'openai' => $this->callOpenAI($fullPrompt),
'anthropic' => $this->callAnthropic($fullPrompt),
'gemini' => $this->callGemini($dynamicPrompt, $staticPrompt, $job),
default => $this->callOllama($fullPrompt),
};
// Normalize keys for frontend compatibility
$normalized = $this->normalizeAnalysis($analysis);
// Inject metadata
$normalized['provider'] = $provider;
$normalized['analyzed_at'] = now()->toIso8601String();
return $normalized;
}
/**
* Normalize the AI response keys to ensure frontend compatibility.
*/
protected function normalizeAnalysis(array $data): array
{
$normalized = $data;
// Map custom keys to standard keys if they exist
if (isset($data['score_global']) && !isset($data['match_score'])) {
$normalized['match_score'] = $data['score_global'];
}
if (isset($data['score']) && !isset($data['match_score'])) {
$normalized['match_score'] = $data['score'];
}
if (isset($data['points_forts']) && !isset($data['strengths'])) {
$normalized['strengths'] = $data['points_forts'];
}
if (isset($data['points_faibles']) && !isset($data['gaps'])) {
$normalized['gaps'] = $data['points_faibles'];
}
if (isset($data['recommandation']) && !isset($data['verdict'])) {
$normalized['verdict'] = $data['recommandation'];
}
if (isset($data['synthese']) && !isset($data['summary'])) {
$normalized['summary'] = $data['synthese'];
}
// List-specific normalization (handle list of objects or strings)
$cleanList = function($list) {
if (!is_array($list)) return [];
return array_map(function($item) {
if (is_array($item)) {
$type = $item['type'] ?? $item['title'] ?? $item['category'] ?? null;
$desc = $item['description'] ?? $item['value'] ?? $item['content'] ?? null;
if ($type && $desc) return "{$type} : {$desc}";
if ($desc) return $desc;
if ($type) return $type;
return json_encode($item);
}
return (string) $item;
}, $list);
};
if (isset($normalized['strengths'])) {
$normalized['strengths'] = $cleanList($normalized['strengths']);
}
if (isset($normalized['gaps'])) {
$normalized['gaps'] = $cleanList($normalized['gaps']);
}
if (isset($normalized['elements_bloquants'])) {
$normalized['elements_bloquants'] = $cleanList($normalized['elements_bloquants']);
}
// Ensure match_score is a numeric value and handle common AI formatting quirks
if (isset($normalized['match_score'])) {
$scoreValue = $normalized['match_score'];
if (is_string($scoreValue)) {
// If AI returns something like "18/20", take the first part
if (str_contains($scoreValue, '/')) {
$scoreValue = explode('/', $scoreValue)[0];
}
// Convert comma to dot for European decimals
$scoreValue = str_replace(',', '.', $scoreValue);
// Keep only digits and the first decimal point
$scoreValue = preg_replace('/[^0-9.]/', '', $scoreValue);
}
$num = (float)$scoreValue;
// If the AI returned a ratio beneath 1 (e.g. 0.85 for 85%), scale it up
if ($num > 0 && $num < 1.1 && !is_int($normalized['match_score'])) {
// But be careful: a score of "1" might honestly be 1/100
// but 0.95 is almost certainly a ratio.
if ($num < 1 || str_contains((string)$normalized['match_score'], '.')) {
$num *= 100;
}
}
// Cap at 100
$normalized['match_score'] = (int) min(100, round($num));
}
// Ensure default keys exist even if empty
$normalized['match_score'] = $normalized['match_score'] ?? 0;
$normalized['summary'] = $normalized['summary'] ?? "Pas de résumé généré.";
$normalized['verdict'] = $normalized['verdict'] ?? "Indéterminé";
$normalized['strengths'] = $normalized['strengths'] ?? [];
$normalized['gaps'] = $normalized['gaps'] ?? [];
$normalized['scores_detailles'] = $normalized['scores_detailles'] ?? null;
$normalized['elements_bloquants'] = $normalized['elements_bloquants'] ?? [];
$normalized['questions_entretien_suggerees'] = $normalized['questions_entretien_suggerees'] ?? [];
return $normalized;
}
protected function callOllama(string $prompt)
{
$ollamaUrl = env('OLLAMA_URL', 'http://localhost:11434/api/generate');
$ollamaModel = env('OLLAMA_MODEL', 'mistral');
try {
$response = Http::timeout(120)->post($ollamaUrl, [
'model' => $ollamaModel,
'prompt' => $prompt,
'stream' => false,
'format' => 'json'
]);
if ($response->successful()) {
return json_decode($response->json('response'), true);
} else {
Log::warning("AI Provider Error (Ollama): HTTP " . $response->status() . " - " . $response->body());
}
} catch (\Exception $e) {
Log::error("AI Connection Failed (Ollama): " . $e->getMessage());
}
return $this->getSimulatedAnalysis();
}
protected function callOpenAI(string $prompt)
{
$apiKey = env('OPENAI_API_KEY');
if (!$apiKey) return $this->getSimulatedAnalysis();
try {
$response = Http::withToken($apiKey)->timeout(60)->post('https://api.openai.com/v1/chat/completions', [
'model' => 'gpt-4o',
'messages' => [['role' => 'user', 'content' => $prompt]],
'response_format' => ['type' => 'json_object']
]);
if ($response->successful()) {
return json_decode($response->json('choices.0.message.content'), true);
}
} catch (\Exception $e) {
Log::error("OpenAI Analysis Failed: " . $e->getMessage());
}
return $this->getSimulatedAnalysis();
}
protected function callAnthropic(string $prompt)
{
$apiKey = env('ANTHROPIC_API_KEY');
if (!$apiKey) return $this->getSimulatedAnalysis();
try {
$response = Http::withHeaders([
'x-api-key' => $apiKey,
'anthropic-version' => '2023-06-01',
'content-type' => 'application/json'
])->timeout(60)->post('https://api.anthropic.com/v1/messages', [
'model' => 'claude-3-5-sonnet-20240620',
'max_tokens' => 2048,
'messages' => [['role' => 'user', 'content' => $prompt]]
]);
if ($response->successful()) {
$content = $response->json('content.0.text');
return json_decode($this->extractJson($content), true);
}
} catch (\Exception $e) {
Log::error("Anthropic Analysis Failed: " . $e->getMessage());
}
return $this->getSimulatedAnalysis();
}
protected function callGemini(string $dynamicPrompt, ?string $staticPrompt = null, ?\App\Models\JobPosition $job = null)
{
$apiKey = env('GEMINI_API_KEY');
if (!$apiKey) return $this->getSimulatedAnalysis();
// Models to try in order (Updated for 2026 models)
$models = [
'gemini-3.1-flash-lite-preview',
'gemini-3-flash-preview',
'gemini-1.5-flash-latest'
];
foreach ($models as $model) {
try {
$version = (str_contains($model, '2.0') || str_contains($model, '3.')) ? 'v1beta' : 'v1';
$url = "https://generativelanguage.googleapis.com/{$version}/models/{$model}:generateContent?key=" . $apiKey;
$generationConfig = [
'temperature' => 0.2,
'responseMimeType' => 'application/json'
];
$payload = [
'generationConfig' => $generationConfig,
'contents' => [
['role' => 'user', 'parts' => [['text' => $dynamicPrompt]]]
]
];
// Attempt to use Context Caching if static prompt and job are provided
if ($staticPrompt && $job && $version === 'v1beta') {
$cacheId = $this->getOrCreateContextCache($job, $staticPrompt, $model);
if ($cacheId) {
$payload['cachedContent'] = $cacheId;
// When using cache, the static part is already in the cache
} else {
// Fallback: prepend static part if cache fails
$payload['contents'][0]['parts'][0]['text'] = $staticPrompt . "\n\n" . $dynamicPrompt;
}
} else if ($staticPrompt) {
// Non-cached fallback
$payload['contents'][0]['parts'][0]['text'] = $staticPrompt . "\n\n" . $dynamicPrompt;
}
$response = Http::timeout(60)->post($url, $payload);
if ($response->successful()) {
$candidate = $response->json('candidates.0');
if (isset($candidate['finishReason']) && $candidate['finishReason'] !== 'STOP') {
Log::warning("Gemini warning: Analysis finished with reason " . $candidate['finishReason']);
}
$text = $candidate['content']['parts'][0]['text'] ?? null;
if ($text) {
$json = $this->extractJson($text);
$decoded = json_decode($json, true);
if ($decoded) return $decoded;
}
} else {
Log::error("Gemini API Error ($model): " . $response->status() . " - " . $response->body());
}
} catch (\Exception $e) {
Log::error("Gemini Connection Failed ($model): " . $e->getMessage());
}
}
return $this->getSimulatedAnalysis();
}
/**
* Get or create a Gemini Context Cache for a specific Job Position.
*/
protected function getOrCreateContextCache(\App\Models\JobPosition $job, string $staticPrompt, string $model)
{
if (strlen($staticPrompt) < 120000) {
return null;
}
// Check if we already have a valid cache for this job
if ($job->gemini_cache_id && $job->gemini_cache_expires_at && $job->gemini_cache_expires_at->isFuture()) {
// Basic verification: the cache is tied to a specific model
// We assume the stored cache is for the primary model
return $job->gemini_cache_id;
}
$apiKey = env('GEMINI_API_KEY');
try {
// Create Context Cache (TTL of 1 hour)
$response = Http::timeout(30)->post("https://generativelanguage.googleapis.com/v1beta/cachedContents?key=" . $apiKey, [
'model' => "models/{$model}",
'contents' => [
['role' => 'user', 'parts' => [['text' => $staticPrompt]]]
],
'ttl' => '3600s'
]);
if ($response->successful()) {
$cacheId = $response->json('name');
$job->update([
'gemini_cache_id' => $cacheId,
'gemini_cache_expires_at' => now()->addHour()
]);
return $cacheId;
}
// Log l'erreur pour comprendre pourquoi le cache a été refusé
Log::warning("Gemini Cache Refused: " . $response->body());
} catch (\Exception $e) {
Log::error("Gemini Cache Lifecycle Error: " . $e->getMessage());
}
return null;
}
private function extractJson($string)
{
preg_match('/\{.*\}/s', $string, $matches);
return $matches[0] ?? '{}';
}
private function getSimulatedAnalysis()
{
return [
'match_score' => 75,
'summary' => "Analyse simulée (IA non connectée ou erreur API). Le candidat peut avoir un profil intéressant mais une vérification manuelle est nécessaire.",
'strengths' => ["Expérience pertinente", "Bonne présentation"],
'gaps' => ["Compétences spécifiques à confirmer"],
'verdict' => "Favorable"
];
}
}