mirror of
https://github.com/mendableai/firecrawl.git
synced 2024-11-15 19:22:19 +08:00
Nick: improved map ranking algorithm
This commit is contained in:
parent
79e65f31ef
commit
7acd8d2edb
|
@ -2,6 +2,7 @@ import { Response } from "express";
|
|||
import { v4 as uuidv4 } from "uuid";
|
||||
import {
|
||||
legacyCrawlerOptions,
|
||||
LinkInfo,
|
||||
mapRequestSchema,
|
||||
RequestWithAuth,
|
||||
} from "./types";
|
||||
|
@ -109,6 +110,10 @@ export async function mapController(
|
|||
mapResults = mapResults.slice(0, minumumCutoff);
|
||||
}
|
||||
|
||||
|
||||
|
||||
let linkInfos: LinkInfo[] = [];
|
||||
|
||||
if (mapResults.length > 0) {
|
||||
if (req.body.search) {
|
||||
// Ensure all map results are first, maintaining their order
|
||||
|
@ -117,6 +122,12 @@ export async function mapController(
|
|||
...mapResults.slice(1).map((x) => x.url),
|
||||
...links,
|
||||
];
|
||||
|
||||
linkInfos = [
|
||||
mapResults[0],
|
||||
...mapResults.slice(1),
|
||||
...links.map((x) => ({ url: x })),
|
||||
]
|
||||
} else {
|
||||
mapResults.map((x) => {
|
||||
links.push(x.url);
|
||||
|
@ -128,7 +139,7 @@ export async function mapController(
|
|||
if (req.body.search) {
|
||||
const searchQuery = req.body.search.toLowerCase();
|
||||
|
||||
links = performCosineSimilarity(links, searchQuery);
|
||||
links = performCosineSimilarity(linkInfos, searchQuery);
|
||||
}
|
||||
|
||||
links = links
|
||||
|
|
|
@ -478,3 +478,11 @@ export function legacyDocumentConverter(doc: any): Document {
|
|||
},
|
||||
};
|
||||
}
|
||||
|
||||
|
||||
|
||||
export interface LinkInfo {
|
||||
url: string;
|
||||
title?: string;
|
||||
description?: string;
|
||||
}
|
|
@ -1,6 +1,7 @@
|
|||
import { Logger } from "./logger";
|
||||
import { LinkInfo } from "../controllers/v1/types";
|
||||
|
||||
export function performCosineSimilarity(links: string[], searchQuery: string) {
|
||||
export function performCosineSimilarity(links: LinkInfo[], searchQuery: string) {
|
||||
try {
|
||||
// Function to calculate cosine similarity
|
||||
const cosineSimilarity = (vec1: number[], vec2: number[]): number => {
|
||||
|
@ -27,20 +28,20 @@ export function performCosineSimilarity(links: string[], searchQuery: string) {
|
|||
|
||||
// Calculate similarity scores
|
||||
const similarityScores = links.map((link) => {
|
||||
const linkVector = textToVector(link);
|
||||
const linkText = `${link.url} ${link.title || ''} ${link.description || ''}`.trim();
|
||||
const linkVector = textToVector(linkText);
|
||||
const searchVector = textToVector(searchQuery);
|
||||
return cosineSimilarity(linkVector, searchVector);
|
||||
});
|
||||
|
||||
// Sort links based on similarity scores and print scores
|
||||
const a = links
|
||||
// Sort links based on similarity scores
|
||||
const sortedLinks = links
|
||||
.map((link, index) => ({ link, score: similarityScores[index] }))
|
||||
.sort((a, b) => b.score - a.score);
|
||||
|
||||
links = a.map((item) => item.link);
|
||||
return links;
|
||||
return sortedLinks.map((item) => item.link.url);
|
||||
} catch (error) {
|
||||
Logger.error(`Error performing cosine similarity: ${error}`);
|
||||
return links;
|
||||
return links.map(link => link.url);
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue
Block a user