AI Analysis: clean extracted text to prevent JSON encoding errors

This commit is contained in:
jeremy bayse
2026-03-22 23:20:42 +01:00
parent e6df75c1ff
commit 949423b1ae

View File

@@ -50,13 +50,28 @@ class AIAnalysisService
try {
$pdf = $this->parser->parseFile(Storage::disk('local')->path($document->file_path));
return $pdf->getText();
$text = $pdf->getText();
return $this->cleanText($text);
} catch (\Exception $e) {
Log::error("PDF Extraction Error: " . $e->getMessage());
return null;
}
}
/**
* Clean text to ensure it's valid UTF-8 and fits well in JSON.
*/
protected function cleanText(string $text): string
{
// Remove non-UTF8 characters
$text = mb_convert_encoding($text, 'UTF-8', 'UTF-8');
// Remove control characters (except newlines and tabs)
$text = preg_replace('/[^\x20-\x7E\xA0-\xFF\x0A\x0D\x09]/u', '', $text);
return trim($text);
}
/**
* Call the AI API (using a placeholder for now, or direct Http call).
*/