diff --git a/app/Services/AIAnalysisService.php b/app/Services/AIAnalysisService.php index 012a66d..b1645a4 100644 --- a/app/Services/AIAnalysisService.php +++ b/app/Services/AIAnalysisService.php @@ -50,13 +50,28 @@ class AIAnalysisService try { $pdf = $this->parser->parseFile(Storage::disk('local')->path($document->file_path)); - return $pdf->getText(); + $text = $pdf->getText(); + return $this->cleanText($text); } catch (\Exception $e) { Log::error("PDF Extraction Error: " . $e->getMessage()); return null; } } + /** + * Clean text to ensure it's valid UTF-8 and fits well in JSON. + */ + protected function cleanText(string $text): string + { + // Remove non-UTF8 characters + $text = mb_convert_encoding($text, 'UTF-8', 'UTF-8'); + + // Remove control characters (except newlines and tabs) + $text = preg_replace('/[^\x20-\x7E\xA0-\xFF\x0A\x0D\x09]/u', '', $text); + + return trim($text); + } + /** * Call the AI API (using a placeholder for now, or direct Http call). */