fix(scrapeURL/llmExtract): fix schema-less LLM extract

This commit is contained in:
Gergő Móricz 2024-11-11 21:07:37 +01:00
parent 56bebc8107
commit 2ca22659d3
2 changed files with 11 additions and 1 deletions

View File

@ -53,7 +53,7 @@ const strictMessage = "Unrecognized key in body -- please review the v1 API docu
export const extractOptions = z.object({ export const extractOptions = z.object({
mode: z.enum(["llm"]).default("llm"), mode: z.enum(["llm"]).default("llm"),
schema: z.any().optional(), schema: z.any().optional(),
systemPrompt: z.string().default("Based on the information on the page, extract all the information from the schema. Try to extract all the fields even those that might not be marked as required."), systemPrompt: z.string().default("Based on the information on the page, extract all the information from the schema in JSON format. Try to extract all the fields even those that might not be marked as required."),
prompt: z.string().optional() prompt: z.string().optional()
}).strict(strictMessage); }).strict(strictMessage);

View File

@ -144,6 +144,16 @@ async function generateOpenAICompletions(logger: Logger, document: Document, opt
} }
document.extract = jsonCompletion.choices[0].message.parsed; document.extract = jsonCompletion.choices[0].message.parsed;
if (document.extract === null && jsonCompletion.choices[0].message.content !== null) {
try {
document.extract = JSON.parse(jsonCompletion.choices[0].message.content);
} catch (e) {
logger.error("Failed to parse returned JSON, no schema specified.", { error: e });
throw new LLMRefusalError("Failed to parse returned JSON. Please specify a schema in the extract object.");
}
}
if (options.schema && options.schema.type === "array") { if (options.schema && options.schema.type === "array") {
document.extract = document.extract?.items; document.extract = document.extract?.items;
} }