Swarm Extractor Example

This commit is contained in:
Eric Ciarla 2024-10-17 15:47:28 -04:00
parent 081d7407b3
commit 5f69358ce8
5 changed files with 164 additions and 0 deletions

1
.gitignore vendored
View File

@ -28,3 +28,4 @@ apps/js-sdk/firecrawl/dist
/examples/o1_web_crawler/firecrawl_env
/examples/crm_lead_enrichment/crm_lead_enrichment_env
/.venv

View File

@ -0,0 +1,3 @@
OPENAI_API_KEY=
FIRECRAWL_API_KEY=
SERP_API_KEY=

View File

@ -0,0 +1,37 @@
# Swarm Firecrawl Marketing Agent
A multi-agent system using [OpenAI Swarm](https://github.com/openai/swarm) for AI-powered marketing strategies using [Firecrawl](https://firecrawl.dev) for web scraping.
## Agents
1. User Interface: Manages user interactions
2. Website Scraper: Extracts clean LLM-ready content via Firecrawl API
3. Analyst: Provides marketing insights
4. Campaign Idea: Generates marketing campaign concepts
5. Copywriter: Creates compelling marketing copy
## Requirements
- [Firecrawl](https://firecrawl.dev) API key
- [OpenAI](https://platform.openai.com/api-keys) API key
## Setup
1. Install the required packages:
```
pip install -r requirements.txt
```
2. Set up your environment variables in a `.env` file:
```
OPENAI_API_KEY=your_openai_api_key
FIRECRAWL_API_KEY=your_firecrawl_api_key
```
## Usage
Run the main script to start the interactive demo:
```
python main.py
```

View File

@ -0,0 +1,120 @@
import os
from firecrawl import FirecrawlApp
from swarm import Agent
from swarm.repl import run_demo_loop
import dotenv
from serpapi import GoogleSearch
from openai import OpenAI
dotenv.load_dotenv()
# Initialize FirecrawlApp and OpenAI
app = FirecrawlApp(api_key=os.getenv("FIRECRAWL_API_KEY"))
client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
def search_google(query, objective):
"""Search Google using SerpAPI."""
print(f"Parameters: query={query}, objective={objective}")
search = GoogleSearch({"q": query, "api_key": os.getenv("SERP_API_KEY")})
results = search.get_dict().get("organic_results", [])
return {"objective": objective, "results": results}
def map_url_pages(url, objective):
"""Map a website's pages using Firecrawl."""
search_query = generate_completion(
"website search query generator",
f"Generate a 1-2 word search query for the website: {url} based on the objective",
"Objective: " + objective
)
print(f"Parameters: url={url}, objective={objective}, search_query={search_query}")
map_status = app.map_url(url, params={'search': search_query})
if map_status.get('status') == 'success':
links = map_status.get('links', [])
top_link = links[0] if links else None
return {"objective": objective, "results": [top_link] if top_link else []}
else:
return {"objective": objective, "results": []}
def scrape_url(url, objective):
"""Scrape a website using Firecrawl."""
print(f"Parameters: url={url}, objective={objective}")
scrape_status = app.scrape_url(
url,
params={'formats': ['markdown']}
)
return {"objective": objective, "results": scrape_status}
def analyze_website_content(content, objective):
"""Analyze the scraped website content using OpenAI."""
print(f"Parameters: content={content[:50]}..., objective={objective}")
analysis = generate_completion(
"website data extractor",
f"Analyze the following website content and extract a JSON object based on the objective.",
"Objective: " + objective + "\nContent: " + content
)
return {"objective": objective, "results": analysis}
def generate_completion(role, task, content):
"""Generate a completion using OpenAI."""
print(f"Parameters: role={role}, task={task[:50]}..., content={content[:50]}...")
response = client.chat.completions.create(
model="gpt-4o",
messages=[
{"role": "system", "content": f"You are a {role}. {task}"},
{"role": "user", "content": content}
]
)
return response.choices[0].message.content
def handoff_to_search_google():
"""Hand off the search query to the search google agent."""
return google_search_agent
def handoff_to_map_url():
"""Hand off the url to the map url agent."""
return map_url_agent
def handoff_to_website_scraper():
"""Hand off the url to the website scraper agent."""
return website_scraper_agent
def handoff_to_analyst():
"""Hand off the website content to the analyst agent."""
return analyst_agent
user_interface_agent = Agent(
name="User Interface Agent",
instructions="You are a user interface agent that handles all interactions with the user. You need to always start with an web data extraction objective that the user wants to achieve by searching the web, mapping the web pages, and extracting the content from a specific page. Be concise.",
functions=[handoff_to_search_google],
)
google_search_agent = Agent(
name="Google Search Agent",
instructions="You are a google search agent specialized in searching the web. Only search for the website not any specific page. When you are done, you must hand off to the map agent.",
functions=[search_google, handoff_to_map_url],
)
map_url_agent = Agent(
name="Map URL Agent",
instructions="You are a map url agent specialized in mapping the web pages. When you are done, you must hand off the results to the website scraper agent.",
functions=[map_url_pages, handoff_to_website_scraper],
)
website_scraper_agent = Agent(
name="Website Scraper Agent",
instructions="You are a website scraper agent specialized in scraping website content. When you are done, you must hand off the website content to the analyst agent to extract the data based on the objective.",
functions=[scrape_url, handoff_to_analyst],
)
analyst_agent = Agent(
name="Analyst Agent",
instructions="You are an analyst agent that examines website content and returns a JSON object. When you are done, you must return a JSON object.",
functions=[analyze_website_content],
)
if __name__ == "__main__":
# Run the demo loop with the user interface agent
run_demo_loop(user_interface_agent, stream=True)

View File

@ -0,0 +1,3 @@
firecrawl-py
openai
serpapi