AI Analysis: clean extracted text to prevent JSON encoding errors
This commit is contained in:
@@ -50,13 +50,28 @@ class AIAnalysisService
|
||||
|
||||
try {
|
||||
$pdf = $this->parser->parseFile(Storage::disk('local')->path($document->file_path));
|
||||
return $pdf->getText();
|
||||
$text = $pdf->getText();
|
||||
return $this->cleanText($text);
|
||||
} catch (\Exception $e) {
|
||||
Log::error("PDF Extraction Error: " . $e->getMessage());
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Clean text to ensure it's valid UTF-8 and fits well in JSON.
|
||||
*/
|
||||
protected function cleanText(string $text): string
|
||||
{
|
||||
// Remove non-UTF8 characters
|
||||
$text = mb_convert_encoding($text, 'UTF-8', 'UTF-8');
|
||||
|
||||
// Remove control characters (except newlines and tabs)
|
||||
$text = preg_replace('/[^\x20-\x7E\xA0-\xFF\x0A\x0D\x09]/u', '', $text);
|
||||
|
||||
return trim($text);
|
||||
}
|
||||
|
||||
/**
|
||||
* Call the AI API (using a placeholder for now, or direct Http call).
|
||||
*/
|
||||
|
||||
Reference in New Issue
Block a user