Merge pull request #798 from mendableai/nsc/improved-map-search

Improved /map ranking algorithm for search queries
This commit is contained in:
Nicolas 2024-10-21 12:22:19 -03:00 committed by GitHub
commit 209bbd1346
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 28 additions and 8 deletions

View File

@ -2,6 +2,7 @@ import { Response } from "express";
import { v4 as uuidv4 } from "uuid";
import {
legacyCrawlerOptions,
LinkInfo,
mapRequestSchema,
RequestWithAuth,
} from "./types";
@ -109,6 +110,10 @@ export async function mapController(
mapResults = mapResults.slice(0, minumumCutoff);
}
let linkInfos: LinkInfo[] = [];
if (mapResults.length > 0) {
if (req.body.search) {
// Ensure all map results are first, maintaining their order
@ -117,6 +122,12 @@ export async function mapController(
...mapResults.slice(1).map((x) => x.url),
...links,
];
linkInfos = [
mapResults[0],
...mapResults.slice(1),
...links.map((x) => ({ url: x })),
]
} else {
mapResults.map((x) => {
links.push(x.url);
@ -128,7 +139,7 @@ export async function mapController(
if (req.body.search) {
const searchQuery = req.body.search.toLowerCase();
links = performCosineSimilarity(links, searchQuery);
links = performCosineSimilarity(linkInfos, searchQuery);
}
links = links

View File

@ -478,3 +478,11 @@ export function legacyDocumentConverter(doc: any): Document {
},
};
}
export interface LinkInfo {
url: string;
title?: string;
description?: string;
}

View File

@ -1,6 +1,7 @@
import { Logger } from "./logger";
import { LinkInfo } from "../controllers/v1/types";
export function performCosineSimilarity(links: string[], searchQuery: string) {
export function performCosineSimilarity(links: LinkInfo[], searchQuery: string) {
try {
// Function to calculate cosine similarity
const cosineSimilarity = (vec1: number[], vec2: number[]): number => {
@ -27,20 +28,20 @@ export function performCosineSimilarity(links: string[], searchQuery: string) {
// Calculate similarity scores
const similarityScores = links.map((link) => {
const linkVector = textToVector(link);
const linkText = `${link.url} ${link.title || ''} ${link.description || ''}`.trim();
const linkVector = textToVector(linkText);
const searchVector = textToVector(searchQuery);
return cosineSimilarity(linkVector, searchVector);
});
// Sort links based on similarity scores and print scores
const a = links
// Sort links based on similarity scores
const sortedLinks = links
.map((link, index) => ({ link, score: similarityScores[index] }))
.sort((a, b) => b.score - a.score);
links = a.map((item) => item.link);
return links;
return sortedLinks.map((item) => item.link.url);
} catch (error) {
Logger.error(`Error performing cosine similarity: ${error}`);
return links;
return links.map(link => link.url);
}
}