diff --git a/apps/ui/ingestion-ui/README.md b/apps/ui/ingestion-ui/README.md index e6b49b95..61f9f983 100644 --- a/apps/ui/ingestion-ui/README.md +++ b/apps/ui/ingestion-ui/README.md @@ -20,7 +20,7 @@ This template provides an easy way to spin up a UI for Firecrawl using React. It ``` 2. Set up your Firecrawl API key: - Open `src/components/FirecrawlComponent.tsx` and replace the placeholder API key: + Open `src/components/ingestion.tsx` and replace the placeholder API key: ```typescript const FIRECRAWL_API_KEY = "your-api-key-here"; @@ -36,7 +36,7 @@ This template provides an easy way to spin up a UI for Firecrawl using React. It ## Customization -The main Firecrawl component is located in `src/components/FirecrawlComponent.tsx`. You can modify this file to customize the UI or add additional features. +The main Firecrawl component is located in `src/components/ingestion.tsx`. You can modify this file to customize the UI or add additional features. ## Security Considerations diff --git a/apps/ui/ingestion-ui/package-lock.json b/apps/ui/ingestion-ui/package-lock.json index 7038a1f2..e48e99b8 100644 --- a/apps/ui/ingestion-ui/package-lock.json +++ b/apps/ui/ingestion-ui/package-lock.json @@ -11,6 +11,7 @@ "@radix-ui/react-checkbox": "^1.1.1", "@radix-ui/react-collapsible": "^1.1.0", "@radix-ui/react-label": "^2.1.0", + "@radix-ui/react-radio-group": "^1.2.0", "@radix-ui/react-slot": "^1.1.0", "class-variance-authority": "^0.7.0", "clsx": "^2.1.1", @@ -1192,6 +1193,32 @@ } } }, + "node_modules/@radix-ui/react-collection": { + "version": "1.1.0", + "resolved": "https://registry.npmjs.org/@radix-ui/react-collection/-/react-collection-1.1.0.tgz", + "integrity": "sha512-GZsZslMJEyo1VKm5L1ZJY8tGDxZNPAoUeQUIbKeJfoi7Q4kmig5AsgLMYYuyYbfjd8fBmFORAIwYAkXMnXZgZw==", + "license": "MIT", + "dependencies": { + "@radix-ui/react-compose-refs": "1.1.0", + "@radix-ui/react-context": "1.1.0", + "@radix-ui/react-primitive": "2.0.0", + "@radix-ui/react-slot": "1.1.0" + }, + "peerDependencies": { + "@types/react": "*", + "@types/react-dom": "*", + "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc", + "react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc" + }, + "peerDependenciesMeta": { + "@types/react": { + "optional": true + }, + "@types/react-dom": { + "optional": true + } + } + }, "node_modules/@radix-ui/react-compose-refs": { "version": "1.1.0", "resolved": "https://registry.npmjs.org/@radix-ui/react-compose-refs/-/react-compose-refs-1.1.0.tgz", @@ -1220,6 +1247,21 @@ } } }, + "node_modules/@radix-ui/react-direction": { + "version": "1.1.0", + "resolved": "https://registry.npmjs.org/@radix-ui/react-direction/-/react-direction-1.1.0.tgz", + "integrity": "sha512-BUuBvgThEiAXh2DWu93XsT+a3aWrGqolGlqqw5VU1kG7p/ZH2cuDlM1sRLNnY3QcBS69UIz2mcKhMxDsdewhjg==", + "license": "MIT", + "peerDependencies": { + "@types/react": "*", + "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc" + }, + "peerDependenciesMeta": { + "@types/react": { + "optional": true + } + } + }, "node_modules/@radix-ui/react-id": { "version": "1.1.0", "resolved": "https://registry.npmjs.org/@radix-ui/react-id/-/react-id-1.1.0.tgz", @@ -1304,6 +1346,69 @@ } } }, + "node_modules/@radix-ui/react-radio-group": { + "version": "1.2.0", + "resolved": "https://registry.npmjs.org/@radix-ui/react-radio-group/-/react-radio-group-1.2.0.tgz", + "integrity": "sha512-yv+oiLaicYMBpqgfpSPw6q+RyXlLdIpQWDHZbUKURxe+nEh53hFXPPlfhfQQtYkS5MMK/5IWIa76SksleQZSzw==", + "license": "MIT", + "dependencies": { + "@radix-ui/primitive": "1.1.0", + "@radix-ui/react-compose-refs": "1.1.0", + "@radix-ui/react-context": "1.1.0", + "@radix-ui/react-direction": "1.1.0", + "@radix-ui/react-presence": "1.1.0", + "@radix-ui/react-primitive": "2.0.0", + "@radix-ui/react-roving-focus": "1.1.0", + "@radix-ui/react-use-controllable-state": "1.1.0", + "@radix-ui/react-use-previous": "1.1.0", + "@radix-ui/react-use-size": "1.1.0" + }, + "peerDependencies": { + "@types/react": "*", + "@types/react-dom": "*", + "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc", + "react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc" + }, + "peerDependenciesMeta": { + "@types/react": { + "optional": true + }, + "@types/react-dom": { + "optional": true + } + } + }, + "node_modules/@radix-ui/react-roving-focus": { + "version": "1.1.0", + "resolved": "https://registry.npmjs.org/@radix-ui/react-roving-focus/-/react-roving-focus-1.1.0.tgz", + "integrity": "sha512-EA6AMGeq9AEeQDeSH0aZgG198qkfHSbvWTf1HvoDmOB5bBG/qTxjYMWUKMnYiV6J/iP/J8MEFSuB2zRU2n7ODA==", + "license": "MIT", + "dependencies": { + "@radix-ui/primitive": "1.1.0", + "@radix-ui/react-collection": "1.1.0", + "@radix-ui/react-compose-refs": "1.1.0", + "@radix-ui/react-context": "1.1.0", + "@radix-ui/react-direction": "1.1.0", + "@radix-ui/react-id": "1.1.0", + "@radix-ui/react-primitive": "2.0.0", + "@radix-ui/react-use-callback-ref": "1.1.0", + "@radix-ui/react-use-controllable-state": "1.1.0" + }, + "peerDependencies": { + "@types/react": "*", + "@types/react-dom": "*", + "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc", + "react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc" + }, + "peerDependenciesMeta": { + "@types/react": { + "optional": true + }, + "@types/react-dom": { + "optional": true + } + } + }, "node_modules/@radix-ui/react-slot": { "version": "1.1.0", "resolved": "https://registry.npmjs.org/@radix-ui/react-slot/-/react-slot-1.1.0.tgz", diff --git a/apps/ui/ingestion-ui/package.json b/apps/ui/ingestion-ui/package.json index 48009648..01a754b2 100644 --- a/apps/ui/ingestion-ui/package.json +++ b/apps/ui/ingestion-ui/package.json @@ -13,6 +13,7 @@ "@radix-ui/react-checkbox": "^1.1.1", "@radix-ui/react-collapsible": "^1.1.0", "@radix-ui/react-label": "^2.1.0", + "@radix-ui/react-radio-group": "^1.2.0", "@radix-ui/react-slot": "^1.1.0", "class-variance-authority": "^0.7.0", "clsx": "^2.1.1", diff --git a/apps/ui/ingestion-ui/src/App.tsx b/apps/ui/ingestion-ui/src/App.tsx index eb0e6954..b80a5ad8 100644 --- a/apps/ui/ingestion-ui/src/App.tsx +++ b/apps/ui/ingestion-ui/src/App.tsx @@ -1,9 +1,35 @@ +import { useState } from "react"; import FirecrawlComponent from "./components/ingestion"; +import FirecrawlComponentV1 from "./components/ingestionV1"; +import { RadioGroup, RadioGroupItem } from "@/components/ui/radio-group"; +import { Label } from "@/components/ui/label"; function App() { + const [selectedComponent, setSelectedComponent] = useState<"v0" | "v1">("v1"); + return ( <> - +
+ setSelectedComponent(value as "v0" | "v1")} + className="flex space-x-6 mt-6" + > +
+ + +
+
+ + +
+
+
+ {selectedComponent === "v1" ? ( + + ) : ( + + )} ); } diff --git a/apps/ui/ingestion-ui/src/components/ingestionV1.tsx b/apps/ui/ingestion-ui/src/components/ingestionV1.tsx new file mode 100644 index 00000000..b34c0d6b --- /dev/null +++ b/apps/ui/ingestion-ui/src/components/ingestionV1.tsx @@ -0,0 +1,603 @@ +import { useState, ChangeEvent, FormEvent, useEffect } from "react"; +import { + Card, + CardHeader, + CardTitle, + CardContent, + CardFooter, +} from "@/components/ui/card"; +import { Input } from "@/components/ui/input"; +import { Button } from "@/components/ui/button"; +import { Checkbox } from "@/components/ui/checkbox"; +import { Label } from "@/components/ui/label"; +import { + Collapsible, + CollapsibleContent, + CollapsibleTrigger, +} from "@/components/ui/collapsible"; +import { ChevronDown, ChevronLeft, ChevronRight } from "lucide-react"; + +//! Hardcoded values (not recommended for production) +//! Highly recommended to move all Firecrawl API calls to the backend (e.g. Next.js API route) +const FIRECRAWL_API_URL = "https://api.firecrawl.dev"; // Replace with your actual API URL whether it is local or using Firecrawl Cloud +const FIRECRAWL_API_KEY = "fc-YOUR_API_KEY"; // Replace with your actual API key + +interface FormData { + url: string; + crawlSubPages: boolean; + search: string; + limit: string; + maxDepth: string; + excludePaths: string; + includePaths: string; + extractMainContent: boolean; +} + +interface CrawlerOptions { + includes?: string[]; + excludes?: string[]; + maxDepth?: number; + limit?: number; + returnOnlyUrls: boolean; +} + +interface ScrapeOptions { + formats?: string[]; + onlyMainContent?: boolean; +} + +interface PageOptions { + onlyMainContent: boolean; +} + +interface RequestBody { + url: string; + crawlerOptions?: CrawlerOptions; + pageOptions?: PageOptions; + search?: string; + excludePaths?: string[]; + includePaths?: string[]; + maxDepth?: number; + limit?: number; + scrapeOptions?: ScrapeOptions; + formats?: string[]; +} + +interface ScrapeResultMetadata { + title: string; + description: string; + language: string; + sourceURL: string; + pageStatusCode: number; + pageError?: string; + [key: string]: string | number | undefined; +} + +interface ScrapeResultData { + markdown: string; + content: string; + html: string; + rawHtml: string; + metadata: ScrapeResultMetadata; + llm_extraction: Record; + warning?: string; +} + +interface ScrapeResult { + success: boolean; + data: ScrapeResultData; +} + +export default function FirecrawlComponentV1() { + const [formData, setFormData] = useState({ + url: "", + crawlSubPages: false, + search: "", + limit: "", + maxDepth: "", + excludePaths: "", + includePaths: "", + extractMainContent: false, + }); + const [loading, setLoading] = useState(false); + const [scrapingSelectedLoading, setScrapingSelectedLoading] = + useState(false); + const [crawledUrls, setCrawledUrls] = useState([]); + const [selectedUrls, setSelectedUrls] = useState([]); + const [scrapeResults, setScrapeResults] = useState< + Record + >({}); + const [isCollapsibleOpen, setIsCollapsibleOpen] = useState(true); + const [crawlStatus, setCrawlStatus] = useState<{ + current: number; + total: number | null; + }>({ current: 0, total: null }); + const [elapsedTime, setElapsedTime] = useState(0); + const [showCrawlStatus, setShowCrawlStatus] = useState(false); + const [isScraping, setIsScraping] = useState(false); + const [currentPage, setCurrentPage] = useState(1); + const urlsPerPage = 10; + + useEffect(() => { + let timer: NodeJS.Timeout; + if (loading) { + setShowCrawlStatus(true); + timer = setInterval(() => { + setElapsedTime((prevTime) => prevTime + 1); + }, 1000); + } + return () => { + if (timer) clearInterval(timer); + }; + }, [loading]); + + const handleChange = (e: ChangeEvent) => { + const { name, value, type, checked } = e.target; + setFormData((prevData) => { + const newData = { + ...prevData, + [name]: type === "checkbox" ? checked : value, + }; + + // Automatically check "Crawl Sub-pages" if limit or search have content + if (name === "limit" || name === "search") { + newData.crawlSubPages = !!value || !!newData.limit || !!newData.search; + } + + return newData; + }); + }; + + const handleSubmit = async (e: FormEvent) => { + e.preventDefault(); + setLoading(true); + setIsCollapsibleOpen(false); + setElapsedTime(0); + setCrawlStatus({ current: 0, total: null }); + setIsScraping(!formData.crawlSubPages); + setCrawledUrls([]); + setSelectedUrls([]); + setScrapeResults({}); + setScrapingSelectedLoading(false); + setShowCrawlStatus(false); + + try { + const endpoint = `${FIRECRAWL_API_URL}/v1/${ + formData.crawlSubPages ? "map" : "scrape" + }`; + + const requestBody: RequestBody = formData.crawlSubPages + ? { + url: formData.url, + search: formData.search || undefined, + limit: formData.limit ? parseInt(formData.limit) : undefined, + } + : { + url: formData.url, + formats: ["markdown"], + }; + + const response = await fetch(endpoint, { + method: "POST", + headers: { + Authorization: `Bearer ${FIRECRAWL_API_KEY}`, + "Content-Type": "application/json", + }, + body: JSON.stringify(requestBody), + }); + + if (!response.ok) { + throw new Error(`HTTP error! status: ${response.status}`); + } + + const data = await response.json(); + if (formData.crawlSubPages) { + if (data.success === true && Array.isArray(data.links)) { + setCrawledUrls(data.links); + setSelectedUrls(data.links); + setCrawlStatus({ + current: data.links.length, + total: data.links.length, + }); + + // Set scrape results with the links + const linkResults: Record = {}; + data.links.forEach((link: string) => { + linkResults[link] = { + success: true, + data: { + metadata: { + sourceURL: link, + title: "", + description: "", + language: "", + pageStatusCode: 200, + }, + markdown: "", + content: "", + html: "", + rawHtml: "", + llm_extraction: {}, + }, + }; + }); + } else { + console.error("Unexpected response format from map endpoint"); + console.log(data); + } + } else { + setScrapeResults({ [formData.url]: data }); + setCrawlStatus({ current: 1, total: 1 }); + } + } catch (error) { + console.error("Error:", error); + setScrapeResults({ + error: { + success: false, + data: { + metadata: { + pageError: "Error occurred while fetching data", + title: "", + description: "", + language: "", + sourceURL: "", + pageStatusCode: 0, + }, + markdown: "", + content: "", + html: "", + rawHtml: "", + llm_extraction: {}, + }, + }, + }); + } finally { + setLoading(false); + } + }; + + const handleScrapeSelected = async () => { + setLoading(true); + setElapsedTime(0); + setCrawlStatus({ current: 0, total: selectedUrls.length }); + setIsScraping(true); + setScrapingSelectedLoading(true); + const newScrapeResults: Record = {}; + + for (const [index, url] of selectedUrls.entries()) { + try { + const response = await fetch(`${FIRECRAWL_API_URL}/v1/scrape`, { + method: "POST", + headers: { + Authorization: `Bearer ${FIRECRAWL_API_KEY}`, + "Content-Type": "application/json", + }, + body: JSON.stringify({ + url: url, + formats: ["markdown"], + }), + }); + + if (!response.ok) { + throw new Error(`HTTP error! status: ${response.status}`); + } + + const data: ScrapeResult = await response.json(); + newScrapeResults[url] = data; + setCrawlStatus((prev) => ({ ...prev, current: index + 1 })); + setScrapeResults({ ...scrapeResults, ...newScrapeResults }); + } catch (error) { + console.error(`Error scraping ${url}:`, error); + newScrapeResults[url] = { + success: false, + data: { + markdown: "", + content: "", + html: "", + rawHtml: "", + metadata: { + title: "", + description: "", + language: "", + sourceURL: url, + pageStatusCode: 0, + pageError: (error as Error).message, + }, + llm_extraction: {}, + }, + }; + } + } + + setLoading(false); + setIsScraping(false); + }; + + const handlePageChange = (newPage: number) => { + setCurrentPage(newPage); + }; + + const paginatedUrls = crawledUrls.slice( + (currentPage - 1) * urlsPerPage, + currentPage * urlsPerPage + ); + + return ( +
+ + + + Extract web content (V1) + + Powered by Firecrawl 🔥 + + +
+ Use this component to quickly give your users the ability to connect + their AI apps to web data with Firecrawl. Learn more on the{" "} + + Firecrawl docs! + +
+
+ +
+
+ + +
+ + + + + +
+ + setFormData((prev) => ({ + ...prev, + crawlSubPages: checked, + })) + } + /> + +
+ +
+
+ + +
+
+ + +
+
+
+
+
+ {showCrawlStatus && ( +
+
+ {!isScraping && + crawledUrls.length > 0 && + !scrapingSelectedLoading && ( + <> + { + if (checked) { + setSelectedUrls([...crawledUrls]); + } else { + setSelectedUrls([]); + } + }} + /> + + + )} +
+
+ {isScraping + ? `Scraped ${crawlStatus.current} page(s) in ${elapsedTime}s` + : `Crawled ${crawlStatus.current} pages in ${elapsedTime}s`} +
+
+ )} + + {crawledUrls.length > 0 && + !scrapingSelectedLoading && + !isScraping && ( + <> +
    + {paginatedUrls.map((url, index) => ( +
  • + + setSelectedUrls((prev) => + prev.includes(url) + ? prev.filter((u) => u !== url) + : [...prev, url] + ) + } + /> + + {url.length > 70 ? `${url.slice(0, 70)}...` : url} + +
  • + ))} +
+
+ + + Page {currentPage} of{" "} + {Math.ceil(crawledUrls.length / urlsPerPage)} + + +
+ + )} +
+ + {crawledUrls.length > 0 && !scrapingSelectedLoading && ( + + )} + +
+ + {Object.keys(scrapeResults).length > 0 && ( +
+

Scrape Results

+

+ You can do whatever you want with the scrape results. Here is a + basic showcase of the markdown. +

+
+ {Object.entries(scrapeResults).map(([url, result]) => ( + + + {result.data.metadata.title} + + {url + .replace(/^(https?:\/\/)?(www\.)?/, "") + .replace(/\/$/, "")} + + + +
+ {result.success ? ( + <> +
+                          {result.data.markdown.trim()}
+                        
+ + ) : ( + <> +

+ Failed to scrape this URL +

+

+ {result.toString()} +

+ + )} +
+
+
+ ))} +
+
+ )} +
+ ); +} diff --git a/apps/ui/ingestion-ui/src/components/ui/radio-group.tsx b/apps/ui/ingestion-ui/src/components/ui/radio-group.tsx new file mode 100644 index 00000000..43b43b48 --- /dev/null +++ b/apps/ui/ingestion-ui/src/components/ui/radio-group.tsx @@ -0,0 +1,42 @@ +import * as React from "react" +import * as RadioGroupPrimitive from "@radix-ui/react-radio-group" +import { Circle } from "lucide-react" + +import { cn } from "@/lib/utils" + +const RadioGroup = React.forwardRef< + React.ElementRef, + React.ComponentPropsWithoutRef +>(({ className, ...props }, ref) => { + return ( + + ) +}) +RadioGroup.displayName = RadioGroupPrimitive.Root.displayName + +const RadioGroupItem = React.forwardRef< + React.ElementRef, + React.ComponentPropsWithoutRef +>(({ className, ...props }, ref) => { + return ( + + + + + + ) +}) +RadioGroupItem.displayName = RadioGroupPrimitive.Item.displayName + +export { RadioGroup, RadioGroupItem }