mirror of
https://github.com/mendableai/firecrawl.git
synced 2024-11-16 03:32:22 +08:00
fix(html-to-markdown): make error reporting less intrusive
This commit is contained in:
parent
bd928b1512
commit
0a1c99074f
|
@ -6,22 +6,28 @@ import * as Sentry from "@sentry/node";
|
||||||
|
|
||||||
import dotenv from 'dotenv';
|
import dotenv from 'dotenv';
|
||||||
import { logger } from './logger';
|
import { logger } from './logger';
|
||||||
|
import { stat } from 'fs/promises';
|
||||||
dotenv.config();
|
dotenv.config();
|
||||||
|
|
||||||
// TODO: add a timeout to the Go parser
|
// TODO: add a timeout to the Go parser
|
||||||
|
const goExecutablePath = join(process.cwd(), 'sharedLibs', 'go-html-to-md', 'html-to-markdown.so');
|
||||||
|
|
||||||
class GoMarkdownConverter {
|
class GoMarkdownConverter {
|
||||||
private static instance: GoMarkdownConverter;
|
private static instance: GoMarkdownConverter;
|
||||||
private convert: any;
|
private convert: any;
|
||||||
|
|
||||||
private constructor() {
|
private constructor() {
|
||||||
const goExecutablePath = join(process.cwd(), 'sharedLibs', 'go-html-to-md', 'html-to-markdown.so');
|
|
||||||
const lib = koffi.load(goExecutablePath);
|
const lib = koffi.load(goExecutablePath);
|
||||||
this.convert = lib.func('ConvertHTMLToMarkdown', 'string', ['string']);
|
this.convert = lib.func('ConvertHTMLToMarkdown', 'string', ['string']);
|
||||||
}
|
}
|
||||||
|
|
||||||
public static getInstance(): GoMarkdownConverter {
|
public static async getInstance(): Promise<GoMarkdownConverter> {
|
||||||
if (!GoMarkdownConverter.instance) {
|
if (!GoMarkdownConverter.instance) {
|
||||||
|
try {
|
||||||
|
await stat(goExecutablePath);
|
||||||
|
} catch (_) {
|
||||||
|
throw Error("Go shared library not found");
|
||||||
|
}
|
||||||
GoMarkdownConverter.instance = new GoMarkdownConverter();
|
GoMarkdownConverter.instance = new GoMarkdownConverter();
|
||||||
}
|
}
|
||||||
return GoMarkdownConverter.instance;
|
return GoMarkdownConverter.instance;
|
||||||
|
@ -47,7 +53,7 @@ export async function parseMarkdown(html: string | null | undefined): Promise<st
|
||||||
|
|
||||||
try {
|
try {
|
||||||
if (process.env.USE_GO_MARKDOWN_PARSER == "true") {
|
if (process.env.USE_GO_MARKDOWN_PARSER == "true") {
|
||||||
const converter = GoMarkdownConverter.getInstance();
|
const converter = await GoMarkdownConverter.getInstance();
|
||||||
let markdownContent = await converter.convertHTMLToMarkdown(html);
|
let markdownContent = await converter.convertHTMLToMarkdown(html);
|
||||||
|
|
||||||
markdownContent = processMultiLineLinks(markdownContent);
|
markdownContent = processMultiLineLinks(markdownContent);
|
||||||
|
@ -56,8 +62,12 @@ export async function parseMarkdown(html: string | null | undefined): Promise<st
|
||||||
return markdownContent;
|
return markdownContent;
|
||||||
}
|
}
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
|
if (!(error instanceof Error) || error.message !== "Go shared library not found") {
|
||||||
Sentry.captureException(error);
|
Sentry.captureException(error);
|
||||||
logger.error(`Error converting HTML to Markdown with Go parser: ${error}`);
|
logger.error(`Error converting HTML to Markdown with Go parser: ${error}`);
|
||||||
|
} else {
|
||||||
|
logger.warn("Tried to use Go parser, but it doesn't exist in the file system.", { goExecutablePath });
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Fallback to TurndownService if Go parser fails or is not enabled
|
// Fallback to TurndownService if Go parser fails or is not enabled
|
||||||
|
@ -89,7 +99,7 @@ export async function parseMarkdown(html: string | null | undefined): Promise<st
|
||||||
|
|
||||||
return markdownContent;
|
return markdownContent;
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
console.error("Error converting HTML to Markdown: ", error);
|
logger.error("Error converting HTML to Markdown", {error});
|
||||||
return ""; // Optionally return an empty string or handle the error as needed
|
return ""; // Optionally return an empty string or handle the error as needed
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue
Block a user