fix(html-to-markdown): make error reporting less intrusive

This commit is contained in:
Móricz Gergő 2024-11-14 08:58:00 +01:00
parent bd928b1512
commit 0a1c99074f

View File

@ -6,22 +6,28 @@ import * as Sentry from "@sentry/node";
import dotenv from 'dotenv'; import dotenv from 'dotenv';
import { logger } from './logger'; import { logger } from './logger';
import { stat } from 'fs/promises';
dotenv.config(); dotenv.config();
// TODO: add a timeout to the Go parser // TODO: add a timeout to the Go parser
const goExecutablePath = join(process.cwd(), 'sharedLibs', 'go-html-to-md', 'html-to-markdown.so');
class GoMarkdownConverter { class GoMarkdownConverter {
private static instance: GoMarkdownConverter; private static instance: GoMarkdownConverter;
private convert: any; private convert: any;
private constructor() { private constructor() {
const goExecutablePath = join(process.cwd(), 'sharedLibs', 'go-html-to-md', 'html-to-markdown.so');
const lib = koffi.load(goExecutablePath); const lib = koffi.load(goExecutablePath);
this.convert = lib.func('ConvertHTMLToMarkdown', 'string', ['string']); this.convert = lib.func('ConvertHTMLToMarkdown', 'string', ['string']);
} }
public static getInstance(): GoMarkdownConverter { public static async getInstance(): Promise<GoMarkdownConverter> {
if (!GoMarkdownConverter.instance) { if (!GoMarkdownConverter.instance) {
try {
await stat(goExecutablePath);
} catch (_) {
throw Error("Go shared library not found");
}
GoMarkdownConverter.instance = new GoMarkdownConverter(); GoMarkdownConverter.instance = new GoMarkdownConverter();
} }
return GoMarkdownConverter.instance; return GoMarkdownConverter.instance;
@ -47,7 +53,7 @@ export async function parseMarkdown(html: string | null | undefined): Promise<st
try { try {
if (process.env.USE_GO_MARKDOWN_PARSER == "true") { if (process.env.USE_GO_MARKDOWN_PARSER == "true") {
const converter = GoMarkdownConverter.getInstance(); const converter = await GoMarkdownConverter.getInstance();
let markdownContent = await converter.convertHTMLToMarkdown(html); let markdownContent = await converter.convertHTMLToMarkdown(html);
markdownContent = processMultiLineLinks(markdownContent); markdownContent = processMultiLineLinks(markdownContent);
@ -56,8 +62,12 @@ export async function parseMarkdown(html: string | null | undefined): Promise<st
return markdownContent; return markdownContent;
} }
} catch (error) { } catch (error) {
if (!(error instanceof Error) || error.message !== "Go shared library not found") {
Sentry.captureException(error); Sentry.captureException(error);
logger.error(`Error converting HTML to Markdown with Go parser: ${error}`); logger.error(`Error converting HTML to Markdown with Go parser: ${error}`);
} else {
logger.warn("Tried to use Go parser, but it doesn't exist in the file system.", { goExecutablePath });
}
} }
// Fallback to TurndownService if Go parser fails or is not enabled // Fallback to TurndownService if Go parser fails or is not enabled
@ -89,7 +99,7 @@ export async function parseMarkdown(html: string | null | undefined): Promise<st
return markdownContent; return markdownContent;
} catch (error) { } catch (error) {
console.error("Error converting HTML to Markdown: ", error); logger.error("Error converting HTML to Markdown", {error});
return ""; // Optionally return an empty string or handle the error as needed return ""; // Optionally return an empty string or handle the error as needed
} }
} }