firecrawl/apps/rust-sdk/examples/example.rs
2024-09-20 20:10:11 +02:00

87 lines
2.8 KiB
Rust

use firecrawl::{crawl::CrawlOptions, scrape::{ExtractOptions, ScrapeFormats, ScrapeOptions}, FirecrawlApp};
use serde_json::json;
#[tokio::main]
async fn main() {
// Initialize the FirecrawlApp with the API key
let app = FirecrawlApp::new("fc-YOUR-API-KEY").expect("Failed to initialize FirecrawlApp");
// Or, connect to a self-hosted instance:
// let app = FirecrawlApp::new_selfhosted("http://localhost:3002", None).expect("Failed to initialize FirecrawlApp");
// Scrape a website
let scrape_result = app.scrape_url("https://firecrawl.dev", None).await;
match scrape_result {
Ok(data) => println!("Scrape Result:\n{}", data.markdown.unwrap()),
Err(e) => eprintln!("Scrape failed: {:#?}", e),
}
// Crawl a website
let crawl_options = CrawlOptions {
exclude_paths: vec![ "blog/*".into() ].into(),
..Default::default()
};
let crawl_result = app
.crawl_url("https://mendable.ai", crawl_options)
.await;
match crawl_result {
Ok(data) => println!("Crawl Result (used {} credits):\n{:#?}", data.credits_used, data.data),
Err(e) => eprintln!("Crawl failed: {}", e),
}
// Scrape with Extract
let json_schema = json!({
"type": "object",
"properties": {
"top": {
"type": "array",
"items": {
"type": "object",
"properties": {
"title": {"type": "string"},
"points": {"type": "number"},
"by": {"type": "string"},
"commentsURL": {"type": "string"}
},
"required": ["title", "points", "by", "commentsURL"]
},
"minItems": 5,
"maxItems": 5,
"description": "Top 5 stories on Hacker News"
}
},
"required": ["top"]
});
let llm_extraction_options = ScrapeOptions {
formats: vec![ ScrapeFormats::Extract ].into(),
extract: ExtractOptions {
schema: json_schema.into(),
..Default::default()
}.into(),
..Default::default()
};
let llm_extraction_result = app
.scrape_url("https://news.ycombinator.com", llm_extraction_options)
.await;
match llm_extraction_result {
Ok(data) => println!("LLM Extraction Result:\n{:#?}", data.extract.unwrap()),
Err(e) => eprintln!("LLM Extraction failed: {}", e),
}
// Map a website (Alpha)
let map_result = app
.map_url("https://firecrawl.dev", None)
.await;
match map_result {
Ok(data) => println!("Mapped URLs: {:#?}", data),
Err(e) => eprintln!("Map failed: {}", e),
}
}