Merge branch 'main' into v1/llm-extract

This commit is contained in:
Nicolas 2024-08-30 12:34:50 -03:00
commit 552328d168
9 changed files with 839 additions and 21 deletions

View File

@ -1,3 +1,38 @@
<h3 align="center">
<img
src="https://raw.githubusercontent.com/mendableai/firecrawl/main/img/firecrawl_logo.png"
height="200"
>
</h3>
<div align="center">
<a href="https://github.com/mendableai/firecrawl/blob/main/LICENSE">
<img src="https://img.shields.io/github/license/mendableai/firecrawl" alt="License">
</a>
<a href="https://pepy.tech/project/firecrawl-py">
<img src="https://static.pepy.tech/badge/firecrawl-py" alt="Downloads">
</a>
<a href="https://GitHub.com/mendableai/firecrawl/graphs/contributors">
<img src="https://img.shields.io/github/contributors/mendableai/firecrawl.svg" alt="GitHub Contributors">
</a>
<a href="https://github.com/mendableai/firecrawl">
<img src="https://badgen.net/badge/Open%20Source%20%3F/Yes%21/blue?icon=github" alt="Open Source">
</a>
</div>
<div>
<p align="center">
<a href="https://twitter.com/firecrawl_dev">
<img src="https://img.shields.io/badge/Follow%20on%20X-000000?style=for-the-badge&logo=x&logoColor=white" alt="Follow on X" />
</a>
<a href="https://www.linkedin.com/company/104100957">
<img src="https://img.shields.io/badge/Follow%20on%20LinkedIn-0077B5?style=for-the-badge&logo=linkedin&logoColor=white" alt="Follow on LinkedIn" />
</a>
<a href="https://discord.com/invite/gSmWdAkdwd">
<img src="https://img.shields.io/badge/Join%20our%20Discord-5865F2?style=for-the-badge&logo=discord&logoColor=white" alt="Join our Discord" />
</a>
</p>
</div>
# 🔥 Firecrawl
Crawl and convert any website into LLM-ready markdown or structured data. Built by [Mendable.ai](https://mendable.ai?ref=gfirecrawl) and the Firecrawl community. Includes powerful scraping, crawling and data extraction capabilities.
@ -10,7 +45,9 @@ _This repository is in its early development stages. We are still merging custom
_Pst. hey, you, join our stargazers :)_
<img src="https://github.com/mendableai/firecrawl/assets/44934913/53c4483a-0f0e-40c6-bd84-153a07f94d29" width="200">
<a href="https://github.com/mendableai/firecrawl">
<img src="https://img.shields.io/github/stars/mendableai/firecrawl.svg?style=social&label=Star&maxAge=2592000" alt="GitHub stars">
</a>
## How to use it?

View File

@ -190,12 +190,17 @@ if (cluster.isMaster) {
res.send({ isProduction: global.isProduction });
});
Sentry.setupExpressErrorHandler(app);
app.use((err: unknown, req: Request<{}, ErrorResponse, undefined>, res: ResponseWithSentry<ErrorResponse>, next: NextFunction) => {
app.use((err: unknown, req: Request<{}, ErrorResponse, undefined>, res: Response<ErrorResponse>, next: NextFunction) => {
if (err instanceof ZodError) {
res.status(400).json({ success: false, error: "Bad Request", details: err.errors });
} else {
next(err);
}
});
Sentry.setupExpressErrorHandler(app);
app.use((err: unknown, req: Request<{}, ErrorResponse, undefined>, res: ResponseWithSentry<ErrorResponse>, next: NextFunction) => {
const id = res.sentry ?? uuidv4();
let verbose = JSON.stringify(err);
if (verbose === "{}") {
@ -210,7 +215,6 @@ if (cluster.isMaster) {
Logger.error("Error occurred in request! (" + req.path + ") -- ID " + id + " -- " + verbose);
res.status(500).json({ success: false, error: "An unexpected error occurred. Please contact hello@firecrawl.com for help. Your exception ID is " + id });
}
});
Logger.info(`Worker ${process.pid} started`);

View File

@ -20,7 +20,7 @@ This template provides an easy way to spin up a UI for Firecrawl using React. It
```
2. Set up your Firecrawl API key:
Open `src/components/FirecrawlComponent.tsx` and replace the placeholder API key:
Open `src/components/ingestion.tsx` and replace the placeholder API key:
```typescript
const FIRECRAWL_API_KEY = "your-api-key-here";
@ -36,7 +36,7 @@ This template provides an easy way to spin up a UI for Firecrawl using React. It
## Customization
The main Firecrawl component is located in `src/components/FirecrawlComponent.tsx`. You can modify this file to customize the UI or add additional features.
The main Firecrawl component is located in `src/components/ingestion.tsx`. You can modify this file to customize the UI or add additional features.
## Security Considerations

View File

@ -11,6 +11,7 @@
"@radix-ui/react-checkbox": "^1.1.1",
"@radix-ui/react-collapsible": "^1.1.0",
"@radix-ui/react-label": "^2.1.0",
"@radix-ui/react-radio-group": "^1.2.0",
"@radix-ui/react-slot": "^1.1.0",
"class-variance-authority": "^0.7.0",
"clsx": "^2.1.1",
@ -1192,6 +1193,32 @@
}
}
},
"node_modules/@radix-ui/react-collection": {
"version": "1.1.0",
"resolved": "https://registry.npmjs.org/@radix-ui/react-collection/-/react-collection-1.1.0.tgz",
"integrity": "sha512-GZsZslMJEyo1VKm5L1ZJY8tGDxZNPAoUeQUIbKeJfoi7Q4kmig5AsgLMYYuyYbfjd8fBmFORAIwYAkXMnXZgZw==",
"license": "MIT",
"dependencies": {
"@radix-ui/react-compose-refs": "1.1.0",
"@radix-ui/react-context": "1.1.0",
"@radix-ui/react-primitive": "2.0.0",
"@radix-ui/react-slot": "1.1.0"
},
"peerDependencies": {
"@types/react": "*",
"@types/react-dom": "*",
"react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc",
"react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc"
},
"peerDependenciesMeta": {
"@types/react": {
"optional": true
},
"@types/react-dom": {
"optional": true
}
}
},
"node_modules/@radix-ui/react-compose-refs": {
"version": "1.1.0",
"resolved": "https://registry.npmjs.org/@radix-ui/react-compose-refs/-/react-compose-refs-1.1.0.tgz",
@ -1220,6 +1247,21 @@
}
}
},
"node_modules/@radix-ui/react-direction": {
"version": "1.1.0",
"resolved": "https://registry.npmjs.org/@radix-ui/react-direction/-/react-direction-1.1.0.tgz",
"integrity": "sha512-BUuBvgThEiAXh2DWu93XsT+a3aWrGqolGlqqw5VU1kG7p/ZH2cuDlM1sRLNnY3QcBS69UIz2mcKhMxDsdewhjg==",
"license": "MIT",
"peerDependencies": {
"@types/react": "*",
"react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc"
},
"peerDependenciesMeta": {
"@types/react": {
"optional": true
}
}
},
"node_modules/@radix-ui/react-id": {
"version": "1.1.0",
"resolved": "https://registry.npmjs.org/@radix-ui/react-id/-/react-id-1.1.0.tgz",
@ -1304,6 +1346,69 @@
}
}
},
"node_modules/@radix-ui/react-radio-group": {
"version": "1.2.0",
"resolved": "https://registry.npmjs.org/@radix-ui/react-radio-group/-/react-radio-group-1.2.0.tgz",
"integrity": "sha512-yv+oiLaicYMBpqgfpSPw6q+RyXlLdIpQWDHZbUKURxe+nEh53hFXPPlfhfQQtYkS5MMK/5IWIa76SksleQZSzw==",
"license": "MIT",
"dependencies": {
"@radix-ui/primitive": "1.1.0",
"@radix-ui/react-compose-refs": "1.1.0",
"@radix-ui/react-context": "1.1.0",
"@radix-ui/react-direction": "1.1.0",
"@radix-ui/react-presence": "1.1.0",
"@radix-ui/react-primitive": "2.0.0",
"@radix-ui/react-roving-focus": "1.1.0",
"@radix-ui/react-use-controllable-state": "1.1.0",
"@radix-ui/react-use-previous": "1.1.0",
"@radix-ui/react-use-size": "1.1.0"
},
"peerDependencies": {
"@types/react": "*",
"@types/react-dom": "*",
"react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc",
"react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc"
},
"peerDependenciesMeta": {
"@types/react": {
"optional": true
},
"@types/react-dom": {
"optional": true
}
}
},
"node_modules/@radix-ui/react-roving-focus": {
"version": "1.1.0",
"resolved": "https://registry.npmjs.org/@radix-ui/react-roving-focus/-/react-roving-focus-1.1.0.tgz",
"integrity": "sha512-EA6AMGeq9AEeQDeSH0aZgG198qkfHSbvWTf1HvoDmOB5bBG/qTxjYMWUKMnYiV6J/iP/J8MEFSuB2zRU2n7ODA==",
"license": "MIT",
"dependencies": {
"@radix-ui/primitive": "1.1.0",
"@radix-ui/react-collection": "1.1.0",
"@radix-ui/react-compose-refs": "1.1.0",
"@radix-ui/react-context": "1.1.0",
"@radix-ui/react-direction": "1.1.0",
"@radix-ui/react-id": "1.1.0",
"@radix-ui/react-primitive": "2.0.0",
"@radix-ui/react-use-callback-ref": "1.1.0",
"@radix-ui/react-use-controllable-state": "1.1.0"
},
"peerDependencies": {
"@types/react": "*",
"@types/react-dom": "*",
"react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc",
"react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc"
},
"peerDependenciesMeta": {
"@types/react": {
"optional": true
},
"@types/react-dom": {
"optional": true
}
}
},
"node_modules/@radix-ui/react-slot": {
"version": "1.1.0",
"resolved": "https://registry.npmjs.org/@radix-ui/react-slot/-/react-slot-1.1.0.tgz",

View File

@ -13,6 +13,7 @@
"@radix-ui/react-checkbox": "^1.1.1",
"@radix-ui/react-collapsible": "^1.1.0",
"@radix-ui/react-label": "^2.1.0",
"@radix-ui/react-radio-group": "^1.2.0",
"@radix-ui/react-slot": "^1.1.0",
"class-variance-authority": "^0.7.0",
"clsx": "^2.1.1",

View File

@ -1,9 +1,35 @@
import { useState } from "react";
import FirecrawlComponent from "./components/ingestion";
import FirecrawlComponentV1 from "./components/ingestionV1";
import { RadioGroup, RadioGroupItem } from "@/components/ui/radio-group";
import { Label } from "@/components/ui/label";
function App() {
const [selectedComponent, setSelectedComponent] = useState<"v0" | "v1">("v1");
return (
<>
<div className="flex justify-center items-center space-x-2 p-4">
<RadioGroup
defaultValue="v1"
onValueChange={(value) => setSelectedComponent(value as "v0" | "v1")}
className="flex space-x-6 mt-6"
>
<div className="flex items-center space-x-2 p-2">
<RadioGroupItem value="v0" id="v0"></RadioGroupItem>
<Label htmlFor="v0">Firecrawl Component V0</Label>
</div>
<div className="flex items-center space-x-2 p-2">
<RadioGroupItem value="v1" id="v1"></RadioGroupItem>
<Label htmlFor="v1">Firecrawl Component V1</Label>
</div>
</RadioGroup>
</div>
{selectedComponent === "v1" ? (
<FirecrawlComponentV1 />
) : (
<FirecrawlComponent />
)}
</>
);
}

View File

@ -0,0 +1,603 @@
import { useState, ChangeEvent, FormEvent, useEffect } from "react";
import {
Card,
CardHeader,
CardTitle,
CardContent,
CardFooter,
} from "@/components/ui/card";
import { Input } from "@/components/ui/input";
import { Button } from "@/components/ui/button";
import { Checkbox } from "@/components/ui/checkbox";
import { Label } from "@/components/ui/label";
import {
Collapsible,
CollapsibleContent,
CollapsibleTrigger,
} from "@/components/ui/collapsible";
import { ChevronDown, ChevronLeft, ChevronRight } from "lucide-react";
//! Hardcoded values (not recommended for production)
//! Highly recommended to move all Firecrawl API calls to the backend (e.g. Next.js API route)
const FIRECRAWL_API_URL = "https://api.firecrawl.dev"; // Replace with your actual API URL whether it is local or using Firecrawl Cloud
const FIRECRAWL_API_KEY = "fc-YOUR_API_KEY"; // Replace with your actual API key
interface FormData {
url: string;
crawlSubPages: boolean;
search: string;
limit: string;
maxDepth: string;
excludePaths: string;
includePaths: string;
extractMainContent: boolean;
}
interface CrawlerOptions {
includes?: string[];
excludes?: string[];
maxDepth?: number;
limit?: number;
returnOnlyUrls: boolean;
}
interface ScrapeOptions {
formats?: string[];
onlyMainContent?: boolean;
}
interface PageOptions {
onlyMainContent: boolean;
}
interface RequestBody {
url: string;
crawlerOptions?: CrawlerOptions;
pageOptions?: PageOptions;
search?: string;
excludePaths?: string[];
includePaths?: string[];
maxDepth?: number;
limit?: number;
scrapeOptions?: ScrapeOptions;
formats?: string[];
}
interface ScrapeResultMetadata {
title: string;
description: string;
language: string;
sourceURL: string;
pageStatusCode: number;
pageError?: string;
[key: string]: string | number | undefined;
}
interface ScrapeResultData {
markdown: string;
content: string;
html: string;
rawHtml: string;
metadata: ScrapeResultMetadata;
llm_extraction: Record<string, unknown>;
warning?: string;
}
interface ScrapeResult {
success: boolean;
data: ScrapeResultData;
}
export default function FirecrawlComponentV1() {
const [formData, setFormData] = useState<FormData>({
url: "",
crawlSubPages: false,
search: "",
limit: "",
maxDepth: "",
excludePaths: "",
includePaths: "",
extractMainContent: false,
});
const [loading, setLoading] = useState<boolean>(false);
const [scrapingSelectedLoading, setScrapingSelectedLoading] =
useState<boolean>(false);
const [crawledUrls, setCrawledUrls] = useState<string[]>([]);
const [selectedUrls, setSelectedUrls] = useState<string[]>([]);
const [scrapeResults, setScrapeResults] = useState<
Record<string, ScrapeResult>
>({});
const [isCollapsibleOpen, setIsCollapsibleOpen] = useState(true);
const [crawlStatus, setCrawlStatus] = useState<{
current: number;
total: number | null;
}>({ current: 0, total: null });
const [elapsedTime, setElapsedTime] = useState<number>(0);
const [showCrawlStatus, setShowCrawlStatus] = useState<boolean>(false);
const [isScraping, setIsScraping] = useState<boolean>(false);
const [currentPage, setCurrentPage] = useState<number>(1);
const urlsPerPage = 10;
useEffect(() => {
let timer: NodeJS.Timeout;
if (loading) {
setShowCrawlStatus(true);
timer = setInterval(() => {
setElapsedTime((prevTime) => prevTime + 1);
}, 1000);
}
return () => {
if (timer) clearInterval(timer);
};
}, [loading]);
const handleChange = (e: ChangeEvent<HTMLInputElement>) => {
const { name, value, type, checked } = e.target;
setFormData((prevData) => {
const newData = {
...prevData,
[name]: type === "checkbox" ? checked : value,
};
// Automatically check "Crawl Sub-pages" if limit or search have content
if (name === "limit" || name === "search") {
newData.crawlSubPages = !!value || !!newData.limit || !!newData.search;
}
return newData;
});
};
const handleSubmit = async (e: FormEvent<HTMLFormElement>) => {
e.preventDefault();
setLoading(true);
setIsCollapsibleOpen(false);
setElapsedTime(0);
setCrawlStatus({ current: 0, total: null });
setIsScraping(!formData.crawlSubPages);
setCrawledUrls([]);
setSelectedUrls([]);
setScrapeResults({});
setScrapingSelectedLoading(false);
setShowCrawlStatus(false);
try {
const endpoint = `${FIRECRAWL_API_URL}/v1/${
formData.crawlSubPages ? "map" : "scrape"
}`;
const requestBody: RequestBody = formData.crawlSubPages
? {
url: formData.url,
search: formData.search || undefined,
limit: formData.limit ? parseInt(formData.limit) : undefined,
}
: {
url: formData.url,
formats: ["markdown"],
};
const response = await fetch(endpoint, {
method: "POST",
headers: {
Authorization: `Bearer ${FIRECRAWL_API_KEY}`,
"Content-Type": "application/json",
},
body: JSON.stringify(requestBody),
});
if (!response.ok) {
throw new Error(`HTTP error! status: ${response.status}`);
}
const data = await response.json();
if (formData.crawlSubPages) {
if (data.success === true && Array.isArray(data.links)) {
setCrawledUrls(data.links);
setSelectedUrls(data.links);
setCrawlStatus({
current: data.links.length,
total: data.links.length,
});
// Set scrape results with the links
const linkResults: Record<string, ScrapeResult> = {};
data.links.forEach((link: string) => {
linkResults[link] = {
success: true,
data: {
metadata: {
sourceURL: link,
title: "",
description: "",
language: "",
pageStatusCode: 200,
},
markdown: "",
content: "",
html: "",
rawHtml: "",
llm_extraction: {},
},
};
});
} else {
console.error("Unexpected response format from map endpoint");
console.log(data);
}
} else {
setScrapeResults({ [formData.url]: data });
setCrawlStatus({ current: 1, total: 1 });
}
} catch (error) {
console.error("Error:", error);
setScrapeResults({
error: {
success: false,
data: {
metadata: {
pageError: "Error occurred while fetching data",
title: "",
description: "",
language: "",
sourceURL: "",
pageStatusCode: 0,
},
markdown: "",
content: "",
html: "",
rawHtml: "",
llm_extraction: {},
},
},
});
} finally {
setLoading(false);
}
};
const handleScrapeSelected = async () => {
setLoading(true);
setElapsedTime(0);
setCrawlStatus({ current: 0, total: selectedUrls.length });
setIsScraping(true);
setScrapingSelectedLoading(true);
const newScrapeResults: Record<string, ScrapeResult> = {};
for (const [index, url] of selectedUrls.entries()) {
try {
const response = await fetch(`${FIRECRAWL_API_URL}/v1/scrape`, {
method: "POST",
headers: {
Authorization: `Bearer ${FIRECRAWL_API_KEY}`,
"Content-Type": "application/json",
},
body: JSON.stringify({
url: url,
formats: ["markdown"],
}),
});
if (!response.ok) {
throw new Error(`HTTP error! status: ${response.status}`);
}
const data: ScrapeResult = await response.json();
newScrapeResults[url] = data;
setCrawlStatus((prev) => ({ ...prev, current: index + 1 }));
setScrapeResults({ ...scrapeResults, ...newScrapeResults });
} catch (error) {
console.error(`Error scraping ${url}:`, error);
newScrapeResults[url] = {
success: false,
data: {
markdown: "",
content: "",
html: "",
rawHtml: "",
metadata: {
title: "",
description: "",
language: "",
sourceURL: url,
pageStatusCode: 0,
pageError: (error as Error).message,
},
llm_extraction: {},
},
};
}
}
setLoading(false);
setIsScraping(false);
};
const handlePageChange = (newPage: number) => {
setCurrentPage(newPage);
};
const paginatedUrls = crawledUrls.slice(
(currentPage - 1) * urlsPerPage,
currentPage * urlsPerPage
);
return (
<div className="max-w-2xl mx-auto p-4">
<Card>
<CardHeader className="flex items-start justify-between mb-0 pb-4">
<CardTitle className="flex items-center justify-between w-full space-x-2">
<span className="text-base">Extract web content (V1)</span>
<a
href="https://www.firecrawl.dev"
className="text-xs text-gray-500 font-normal px-3 py-1 bg-zinc-100 rounded-xl hover:bg-zinc-200 transition-colors"
>
Powered by Firecrawl 🔥
</a>
</CardTitle>
<div className="text-sm text-gray-500 w-11/12 items-center">
Use this component to quickly give your users the ability to connect
their AI apps to web data with Firecrawl. Learn more on the{" "}
<a
href="https://docs.firecrawl.dev/"
className="text-sm text-blue-500"
>
Firecrawl docs!
</a>
</div>
</CardHeader>
<CardContent className="space-y-4">
<form onSubmit={handleSubmit}>
<div className="flex items-center space-x-2">
<Input
placeholder="https://www.firecrawl.dev/"
className="flex-grow"
name="url"
value={formData.url}
onChange={handleChange}
/>
<Button type="submit" variant="default" disabled={loading}>
{loading ? (
<div
role="status"
className="flex items-center justify-between space-x-2"
>
<svg
className="animate-spin h-4 w-4 text-white"
xmlns="http://www.w3.org/2000/svg"
fill="none"
viewBox="0 0 24 24"
>
<circle
className="opacity-25"
cx="12"
cy="12"
r="10"
stroke="currentColor"
strokeWidth="4"
></circle>
<path
className="opacity-75"
fill="currentColor"
d="M4 12a8 8 0 018-8V0C5.373 0 0 5.373 0 12h4zm2 5.291A7.962 7.962 0 014 12H0c0 3.042 1.135 5.824 3 7.938l3-2.647z"
></path>
</svg>
<span className="sr-only">Loading...</span>
</div>
) : (
"Run"
)}
</Button>
</div>
<Collapsible
open={isCollapsibleOpen}
onOpenChange={setIsCollapsibleOpen}
className="mt-2"
>
<CollapsibleTrigger asChild>
<Button variant="ghost" className="w-full justify-between pl-2">
Advanced Options
<ChevronDown className="h-4 w-4 opacity-50" />
</Button>
</CollapsibleTrigger>
<CollapsibleContent className="space-y-4 mt-4 px-2">
<div className="flex items-center space-x-2">
<Checkbox
id="crawlSubPages"
name="crawlSubPages"
checked={formData.crawlSubPages}
onCheckedChange={(checked: boolean) =>
setFormData((prev) => ({
...prev,
crawlSubPages: checked,
}))
}
/>
<label htmlFor="crawlSubPages" className="text-sm">
Crawl Sub-pages
</label>
</div>
<div className="grid grid-cols-2 gap-4">
<div>
<Label
htmlFor="search"
className="block text-left w-full pb-2"
>
Search for specific pages in crawl *
</Label>
<Input
id="search"
name="search"
placeholder="python sdk"
value={formData.search}
onChange={handleChange}
/>
</div>
<div>
<Label
htmlFor="limit"
className="block text-left w-full pb-2"
>
Limit *
</Label>
<Input
id="limit"
name="limit"
placeholder="10"
value={formData.limit}
onChange={handleChange}
/>
</div>
</div>
</CollapsibleContent>
</Collapsible>
</form>
{showCrawlStatus && (
<div className="flex items-center justify-between mb-2 space-x-2 bg-gray-100 p-2 rounded-md">
<div className="flex items-center space-x-2">
{!isScraping &&
crawledUrls.length > 0 &&
!scrapingSelectedLoading && (
<>
<Checkbox
id="selectAll"
checked={selectedUrls.length === crawledUrls.length}
onCheckedChange={(checked) => {
if (checked) {
setSelectedUrls([...crawledUrls]);
} else {
setSelectedUrls([]);
}
}}
/>
<label
htmlFor="selectAll"
className="text-sm cursor-pointer"
>
{selectedUrls.length === crawledUrls.length
? `Unselect All (${selectedUrls.length})`
: `Select All (${selectedUrls.length})`}
</label>
</>
)}
</div>
<div className="text-sm text-gray-600">
{isScraping
? `Scraped ${crawlStatus.current} page(s) in ${elapsedTime}s`
: `Crawled ${crawlStatus.current} pages in ${elapsedTime}s`}
</div>
</div>
)}
{crawledUrls.length > 0 &&
!scrapingSelectedLoading &&
!isScraping && (
<>
<ul className="pl-2">
{paginatedUrls.map((url, index) => (
<li
key={index}
className="flex items-center space-x-2 my-2 text-sm"
>
<Checkbox
checked={selectedUrls.includes(url)}
onCheckedChange={() =>
setSelectedUrls((prev) =>
prev.includes(url)
? prev.filter((u) => u !== url)
: [...prev, url]
)
}
/>
<span className="flex items-center max-w-lg">
{url.length > 70 ? `${url.slice(0, 70)}...` : url}
</span>
</li>
))}
</ul>
<div className="flex items-center justify-between mt-4">
<Button
variant="outline"
className="px-2"
onClick={() => handlePageChange(currentPage - 1)}
disabled={currentPage === 1}
>
<ChevronLeft className="h-5 w-5" />
</Button>
<span className="text-sm text-gray-500">
Page {currentPage} of{" "}
{Math.ceil(crawledUrls.length / urlsPerPage)}
</span>
<Button
variant="outline"
className="px-2"
onClick={() => handlePageChange(currentPage + 1)}
disabled={currentPage * urlsPerPage >= crawledUrls.length}
>
<ChevronRight className="h-5 w-5 " />
</Button>
</div>
</>
)}
</CardContent>
<CardFooter className="w-full flex justify-center">
{crawledUrls.length > 0 && !scrapingSelectedLoading && (
<Button
variant="default"
className="w-full"
onClick={handleScrapeSelected}
disabled={loading || selectedUrls.length === 0}
>
Scrape Selected URLs
</Button>
)}
</CardFooter>
</Card>
{Object.keys(scrapeResults).length > 0 && (
<div className="mt-4">
<h2 className="text-base font-bold ">Scrape Results</h2>
<p className="text-sm text-gray-500">
You can do whatever you want with the scrape results. Here is a
basic showcase of the markdown.
</p>
<div className="flex flex-col gap-4 mt-4 w-full">
{Object.entries(scrapeResults).map(([url, result]) => (
<Card key={url} className="relative p-4 w-full">
<CardTitle className="text-sm font-normal flex flex-col">
<span>{result.data.metadata.title}</span>
<span className="text-xs text-gray-500">
{url
.replace(/^(https?:\/\/)?(www\.)?/, "")
.replace(/\/$/, "")}
</span>
</CardTitle>
<CardContent className="relative px-0 pt-2 !text-xs w-full">
<div className=" overflow-y-auto h-32 bg-zinc-100 rounded-md p-2 w-full">
{result.success ? (
<>
<pre className="text-xs whitespace-pre-wrap">
{result.data.markdown.trim()}
</pre>
</>
) : (
<>
<p className="text-red-500">
Failed to scrape this URL
</p>
<p className="text-zinc-500 font-mono">
{result.toString()}
</p>
</>
)}
</div>
</CardContent>
</Card>
))}
</div>
</div>
)}
</div>
);
}

View File

@ -0,0 +1,42 @@
import * as React from "react"
import * as RadioGroupPrimitive from "@radix-ui/react-radio-group"
import { Circle } from "lucide-react"
import { cn } from "@/lib/utils"
const RadioGroup = React.forwardRef<
React.ElementRef<typeof RadioGroupPrimitive.Root>,
React.ComponentPropsWithoutRef<typeof RadioGroupPrimitive.Root>
>(({ className, ...props }, ref) => {
return (
<RadioGroupPrimitive.Root
className={cn("grid gap-2", className)}
{...props}
ref={ref}
/>
)
})
RadioGroup.displayName = RadioGroupPrimitive.Root.displayName
const RadioGroupItem = React.forwardRef<
React.ElementRef<typeof RadioGroupPrimitive.Item>,
React.ComponentPropsWithoutRef<typeof RadioGroupPrimitive.Item>
>(({ className, ...props }, ref) => {
return (
<RadioGroupPrimitive.Item
ref={ref}
className={cn(
"aspect-square h-4 w-4 rounded-full border border-primary text-primary ring-offset-background focus:outline-none focus-visible:ring-2 focus-visible:ring-ring focus-visible:ring-offset-2 disabled:cursor-not-allowed disabled:opacity-50",
className
)}
{...props}
>
<RadioGroupPrimitive.Indicator className="flex items-center justify-center">
<Circle className="h-2.5 w-2.5 fill-current text-current" />
</RadioGroupPrimitive.Indicator>
</RadioGroupPrimitive.Item>
)
})
RadioGroupItem.displayName = RadioGroupPrimitive.Item.displayName
export { RadioGroup, RadioGroupItem }

BIN
img/firecrawl_logo.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 74 KiB