Added anthropic vision to getImageDescription function

This commit is contained in:
rafaelsideguide 2024-04-16 18:03:48 -03:00
parent 3e4064bce2
commit 00941d94a4
6 changed files with 122 additions and 43 deletions

View File

@ -7,6 +7,7 @@ SUPABASE_SERVICE_TOKEN=
REDIS_URL=
SCRAPING_BEE_API_KEY=
OPENAI_API_KEY=
ANTHROPIC_API_KEY=
BULL_AUTH_KEY=
LOGTAIL_KEY=
PLAYWRIGHT_MICROSERVICE_URL=

View File

@ -39,6 +39,7 @@
"typescript": "^5.4.2"
},
"dependencies": {
"@anthropic-ai/sdk": "^0.20.5",
"@brillout/import": "^0.2.2",
"@bull-board/api": "^5.14.2",
"@bull-board/express": "^5.8.0",

View File

@ -5,6 +5,9 @@ settings:
excludeLinksFromLockfile: false
dependencies:
'@anthropic-ai/sdk':
specifier: ^0.20.5
version: 0.20.5
'@brillout/import':
specifier: ^0.2.2
version: 0.2.3
@ -213,6 +216,21 @@ packages:
'@jridgewell/trace-mapping': 0.3.25
dev: true
/@anthropic-ai/sdk@0.20.5:
resolution: {integrity: sha512-d0ch+zp6/gHR4+2wqWV7JU1EJ7PpHc3r3F6hebovJTouY+pkaId1FuYYaVsG3l/gyqhOZUwKCMSMqcFNf+ZmWg==}
dependencies:
'@types/node': 18.19.22
'@types/node-fetch': 2.6.11
abort-controller: 3.0.0
agentkeepalive: 4.5.0
form-data-encoder: 1.7.2
formdata-node: 4.4.1
node-fetch: 2.7.0
web-streams-polyfill: 3.3.3
transitivePeerDependencies:
- encoding
dev: false
/@anthropic-ai/sdk@0.9.1:
resolution: {integrity: sha512-wa1meQ2WSfoY8Uor3EdrJq0jTiZJoKoSii2ZVWRY1oN4Tlr5s59pADg9T79FTbPe1/se5c3pBeZgJL63wmuoBA==}
dependencies:

View File

@ -4,7 +4,7 @@ import { scrapSingleUrl } from "./single_url";
import { SitemapEntry, fetchSitemapData, getLinksFromSitemap } from "./sitemap";
import { WebCrawler } from "./crawler";
import { getValue, setValue } from "../../services/redis";
import { getImageDescription } from "./utils/gptVision";
import { getImageDescription } from "./utils/imageDescription";
export type WebScraperOptions = {
urls: string[];
@ -16,6 +16,7 @@ export type WebScraperOptions = {
maxCrawledLinks?: number;
limit?: number;
generateImgAltText?: boolean;
generateImgAltTextModel?: "gpt-4-turbo" | "anthropic";
};
concurrentRequests?: number;
};
@ -29,6 +30,7 @@ export class WebScraperDataProvider {
private limit: number = 10000;
private concurrentRequests: number = 20;
private generateImgAltText: boolean = false;
private generateImgAltTextModel: "gpt-4-turbo" | "anthropic" = "gpt-4-turbo";
authorize(): void {
throw new Error("Method not implemented.");
@ -312,7 +314,7 @@ export class WebScraperDataProvider {
let backText = document.content.substring(imageIndex + image.length, Math.min(imageIndex + image.length + 1000, contentLength));
let frontTextStartIndex = Math.max(imageIndex - 1000, 0);
let frontText = document.content.substring(frontTextStartIndex, imageIndex);
altText = await getImageDescription(newImageUrl, backText, frontText);
altText = await getImageDescription(newImageUrl, backText, frontText, this.generateImgAltTextModel);
}
document.content = document.content.replace(image, `![${altText}](${newImageUrl})`);

View File

@ -1,41 +0,0 @@
export async function getImageDescription(
imageUrl: string,
backText: string,
frontText: string
): Promise<string> {
const { OpenAI } = require("openai");
const openai = new OpenAI();
try {
const response = await openai.chat.completions.create({
model: "gpt-4-turbo",
messages: [
{
role: "user",
content: [
{
type: "text",
text:
"What's in the image? You need to answer with the content for the alt tag of the image. To help you with the context, the image is in the following text: " +
backText +
" and the following text: " +
frontText +
". Be super concise.",
},
{
type: "image_url",
image_url: {
url: imageUrl,
},
},
],
},
],
});
return response.choices[0].message.content;
} catch (error) {
console.error("Error generating image alt text:", error?.message);
return "";
}
}

View File

@ -0,0 +1,98 @@
import Anthropic from '@anthropic-ai/sdk';
import axios from 'axios';
export async function getImageDescription(
imageUrl: string,
backText: string,
frontText: string,
model: string = "gpt-4-turbo"
): Promise<string> {
try {
const prompt = "What's in the image? You need to answer with the content for the alt tag of the image. To help you with the context, the image is in the following text: " +
backText +
" and the following text: " +
frontText +
". Be super concise."
switch (model) {
case 'anthropic': {
if (!process.env.ANTHROPIC_API_KEY) {
throw new Error("No Anthropic API key provided");
}
const imageRequest = await axios.get(imageUrl, { responseType: 'arraybuffer' });
const imageMediaType = 'image/png';
const imageData = Buffer.from(imageRequest.data, 'binary').toString('base64');
const anthropic = new Anthropic();
const response = await anthropic.messages.create({
model: "claude-3-opus-20240229",
max_tokens: 1024,
messages: [
{
role: "user",
content: [
{
type: "image",
source: {
type: "base64",
media_type: imageMediaType,
data: imageData,
},
},
{
type: "text",
text: prompt
}
],
}
]
});
return response.content[0].text;
// const response = await anthropic.messages.create({
// messages: [
// {
// role: "user",
// content: prompt,
// },
// ],
// });
}
default: {
if (!process.env.OPENAI_API_KEY) {
throw new Error("No OpenAI API key provided");
}
const { OpenAI } = require("openai");
const openai = new OpenAI();
const response = await openai.chat.completions.create({
model: "gpt-4-turbo",
messages: [
{
role: "user",
content: [
{
type: "text",
text: prompt,
},
{
type: "image_url",
image_url: {
url: imageUrl,
},
},
],
},
],
});
return response.choices[0].message.content;
}
}
} catch (error) {
console.error("Error generating image alt text:", error?.message);
return "";
}
}