fix(html-to-markdown): make error reporting less intrusive

This commit is contained in:
Móricz Gergő 2024-11-14 08:58:00 +01:00
parent bd928b1512
commit 0a1c99074f

View File

@ -6,22 +6,28 @@ import * as Sentry from "@sentry/node";
import dotenv from 'dotenv';
import { logger } from './logger';
import { stat } from 'fs/promises';
dotenv.config();
// TODO: add a timeout to the Go parser
const goExecutablePath = join(process.cwd(), 'sharedLibs', 'go-html-to-md', 'html-to-markdown.so');
class GoMarkdownConverter {
private static instance: GoMarkdownConverter;
private convert: any;
private constructor() {
const goExecutablePath = join(process.cwd(), 'sharedLibs', 'go-html-to-md', 'html-to-markdown.so');
const lib = koffi.load(goExecutablePath);
this.convert = lib.func('ConvertHTMLToMarkdown', 'string', ['string']);
}
public static getInstance(): GoMarkdownConverter {
public static async getInstance(): Promise<GoMarkdownConverter> {
if (!GoMarkdownConverter.instance) {
try {
await stat(goExecutablePath);
} catch (_) {
throw Error("Go shared library not found");
}
GoMarkdownConverter.instance = new GoMarkdownConverter();
}
return GoMarkdownConverter.instance;
@ -47,7 +53,7 @@ export async function parseMarkdown(html: string | null | undefined): Promise<st
try {
if (process.env.USE_GO_MARKDOWN_PARSER == "true") {
const converter = GoMarkdownConverter.getInstance();
const converter = await GoMarkdownConverter.getInstance();
let markdownContent = await converter.convertHTMLToMarkdown(html);
markdownContent = processMultiLineLinks(markdownContent);
@ -56,8 +62,12 @@ export async function parseMarkdown(html: string | null | undefined): Promise<st
return markdownContent;
}
} catch (error) {
if (!(error instanceof Error) || error.message !== "Go shared library not found") {
Sentry.captureException(error);
logger.error(`Error converting HTML to Markdown with Go parser: ${error}`);
} else {
logger.warn("Tried to use Go parser, but it doesn't exist in the file system.", { goExecutablePath });
}
}
// Fallback to TurndownService if Go parser fails or is not enabled
@ -89,7 +99,7 @@ export async function parseMarkdown(html: string | null | undefined): Promise<st
return markdownContent;
} catch (error) {
console.error("Error converting HTML to Markdown: ", error);
logger.error("Error converting HTML to Markdown", {error});
return ""; // Optionally return an empty string or handle the error as needed
}
}