Added anthropic vision to getImageDescription function

2024-11-16 03:32:22 +08:00 · 2024-04-16 18:03:48 -03:00 · 2024-04-16 18:03:48 -03:00 · 00941d94a4
commit 00941d94a4
parent 3e4064bce2
6 changed files with 122 additions and 43 deletions
--- a/apps/api/.env.local
+++ b/apps/api/.env.local
@ -7,6 +7,7 @@ SUPABASE_SERVICE_TOKEN=
 REDIS_URL=
 SCRAPING_BEE_API_KEY=
 OPENAI_API_KEY=
+ANTHROPIC_API_KEY=
 BULL_AUTH_KEY=
 LOGTAIL_KEY=
 PLAYWRIGHT_MICROSERVICE_URL=
--- a/apps/api/package.json
+++ b/apps/api/package.json
@ -39,6 +39,7 @@
    "typescript": "^5.4.2"
  },
  "dependencies": {
+    "@anthropic-ai/sdk": "^0.20.5",
    "@brillout/import": "^0.2.2",
    "@bull-board/api": "^5.14.2",
    "@bull-board/express": "^5.8.0",
--- a/apps/api/pnpm-lock.yaml
+++ b/apps/api/pnpm-lock.yaml
@ -5,6 +5,9 @@ settings:
  excludeLinksFromLockfile: false

 dependencies:
+  '@anthropic-ai/sdk':
+    specifier: ^0.20.5
+    version: 0.20.5
  '@brillout/import':
    specifier: ^0.2.2
    version: 0.2.3
@ -213,6 +216,21 @@ packages:
      '@jridgewell/trace-mapping': 0.3.25
    dev: true

+  /@anthropic-ai/sdk@0.20.5:
+    resolution: {integrity: sha512-d0ch+zp6/gHR4+2wqWV7JU1EJ7PpHc3r3F6hebovJTouY+pkaId1FuYYaVsG3l/gyqhOZUwKCMSMqcFNf+ZmWg==}
+    dependencies:
+      '@types/node': 18.19.22
+      '@types/node-fetch': 2.6.11
+      abort-controller: 3.0.0
+      agentkeepalive: 4.5.0
+      form-data-encoder: 1.7.2
+      formdata-node: 4.4.1
+      node-fetch: 2.7.0
+      web-streams-polyfill: 3.3.3
+    transitivePeerDependencies:
+      - encoding
+    dev: false
+
  /@anthropic-ai/sdk@0.9.1:
    resolution: {integrity: sha512-wa1meQ2WSfoY8Uor3EdrJq0jTiZJoKoSii2ZVWRY1oN4Tlr5s59pADg9T79FTbPe1/se5c3pBeZgJL63wmuoBA==}
    dependencies:
--- a/apps/api/src/scraper/WebScraper/index.ts
+++ b/apps/api/src/scraper/WebScraper/index.ts
@ -4,7 +4,7 @@ import { scrapSingleUrl } from "./single_url";
 import { SitemapEntry, fetchSitemapData, getLinksFromSitemap } from "./sitemap";
 import { WebCrawler } from "./crawler";
 import { getValue, setValue } from "../../services/redis";
-import { getImageDescription } from "./utils/gptVision";
+import { getImageDescription } from "./utils/imageDescription";

 export type WebScraperOptions = {
  urls: string[];
@ -16,6 +16,7 @@ export type WebScraperOptions = {
    maxCrawledLinks?: number;
    limit?: number;
    generateImgAltText?: boolean;
+    generateImgAltTextModel?: "gpt-4-turbo" | "anthropic";
  };
  concurrentRequests?: number;
 };
@ -29,6 +30,7 @@ export class WebScraperDataProvider {
  private limit: number = 10000;
  private concurrentRequests: number = 20;
  private generateImgAltText: boolean = false;
+  private generateImgAltTextModel: "gpt-4-turbo" | "anthropic" = "gpt-4-turbo";

  authorize(): void {
    throw new Error("Method not implemented.");
@ -312,7 +314,7 @@ export class WebScraperDataProvider {
          let backText = document.content.substring(imageIndex + image.length, Math.min(imageIndex + image.length + 1000, contentLength));
          let frontTextStartIndex = Math.max(imageIndex - 1000, 0);
          let frontText = document.content.substring(frontTextStartIndex, imageIndex);
-          altText = await getImageDescription(newImageUrl, backText, frontText);
+          altText = await getImageDescription(newImageUrl, backText, frontText, this.generateImgAltTextModel);
        }

        document.content = document.content.replace(image, `![${altText}](${newImageUrl})`);
--- a/apps/api/src/scraper/WebScraper/utils/gptVision.ts
+++ b/apps/api/src/scraper/WebScraper/utils/gptVision.ts
@ -1,41 +0,0 @@
-export async function getImageDescription(
-  imageUrl: string,
-  backText: string,
-  frontText: string
-): Promise<string> {
-  const { OpenAI } = require("openai");
-  const openai = new OpenAI();
-
-  try {
-    const response = await openai.chat.completions.create({
-      model: "gpt-4-turbo",
-      messages: [
-        {
-          role: "user",
-          content: [
-            {
-              type: "text",
-              text:
-                "What's in the image? You need to answer with the content for the alt tag of the image. To help you with the context, the image is in the following text: " +
-                backText +
-                " and the following text: " +
-                frontText +
-                ". Be super concise.",
-            },
-            {
-              type: "image_url",
-              image_url: {
-                url: imageUrl,
-              },
-            },
-          ],
-        },
-      ],
-    });
-
-    return response.choices[0].message.content;
-  } catch (error) {
-    console.error("Error generating image alt text:", error?.message);
-    return "";
-  }
-}
--- a/apps/api/src/scraper/WebScraper/utils/imageDescription.ts
+++ b/apps/api/src/scraper/WebScraper/utils/imageDescription.ts
@ -0,0 +1,98 @@
+import Anthropic from '@anthropic-ai/sdk';
+import axios from 'axios';
+
+export async function getImageDescription(
+  imageUrl: string,
+  backText: string,
+  frontText: string,
+  model: string = "gpt-4-turbo"
+): Promise<string> {
+  try {
+    const prompt = "What's in the image? You need to answer with the content for the alt tag of the image. To help you with the context, the image is in the following text: " +
+      backText +
+      " and the following text: " +
+      frontText +
+      ". Be super concise."
+
+    switch (model) {
+      case 'anthropic': {
+        if (!process.env.ANTHROPIC_API_KEY) {
+          throw new Error("No Anthropic API key provided");
+        }
+        const imageRequest = await axios.get(imageUrl, { responseType: 'arraybuffer' });
+        const imageMediaType = 'image/png';
+        const imageData = Buffer.from(imageRequest.data, 'binary').toString('base64');
+
+        const anthropic = new Anthropic();
+        const response = await anthropic.messages.create({
+          model: "claude-3-opus-20240229",
+          max_tokens: 1024,
+          messages: [
+            {
+              role: "user",
+              content: [
+                {
+                  type: "image",
+                  source: {
+                    type: "base64",
+                    media_type: imageMediaType,
+                    data: imageData,
+                  },
+                },
+                {
+                  type: "text",
+                  text: prompt
+                }
+              ],
+            }
+          ]
+        });
+
+        return response.content[0].text;
+
+        // const response = await anthropic.messages.create({
+        //   messages: [
+        //     {
+        //       role: "user",
+        //       content: prompt,
+        //     },
+        //   ],
+        // });
+
+      }
+      default: {
+        if (!process.env.OPENAI_API_KEY) {
+          throw new Error("No OpenAI API key provided");
+        }
+
+        const { OpenAI } = require("openai");
+        const openai = new OpenAI();
+      
+        const response = await openai.chat.completions.create({
+          model: "gpt-4-turbo",
+          messages: [
+            {
+              role: "user",
+              content: [
+                {
+                  type: "text",
+                  text: prompt,
+                },
+                {
+                  type: "image_url",
+                  image_url: {
+                    url: imageUrl,
+                  },
+                },
+              ],
+            },
+          ],
+        });
+        return response.choices[0].message.content;
+      }
+    }
+  } catch (error) {
+    console.error("Error generating image alt text:", error?.message);
+    return "";
+  }
+}