firecrawl/apps/test-suite/data/scrape.json
2024-05-15 15:50:50 -03:00

119 lines
3.6 KiB
JSON

[
{
"website": "https://www.anthropic.com/claude",
"prompt": "Does this website contain pricing information?",
"expected_output": "yes"
},
{
"website": "https://mendable.ai/pricing",
"prompt": "Does this website contain pricing information?",
"expected_output": "yes"
},
{
"website": "https://openai.com/news",
"prompt": "Does this website contain a list of research news?",
"expected_output": "yes"
},
{
"website": "https://agentops.ai",
"prompt": "Does this website contain a code snippets?",
"expected_output": "yes"
},
{
"website": "https://ycombinator.com/companies",
"prompt": "Does this website contain a list bigger than 5 of ycombinator companies?",
"expected_output": "yes"
},
{
"website": "https://firecrawl.dev",
"prompt": "Does this website contain a list bigger than 5 of ycombinator companies?",
"expected_output": "no"
},
{
"website": "https://en.wikipedia.org/wiki/T._N._Seshan",
"prompt": "Does this website talk about Seshan's career?",
"expected_output": "yes"
},
{
"website": "https://mendable.ai/blog",
"prompt": "Does this website contain multiple blog articles?",
"expected_output": "yes"
},
{
"website": "https://www.framer.com/pricing",
"prompt": "Is there an enterprise pricing option?",
"expected_output": "yes"
},
{
"website": "https://fly.io/docs/gpus/gpu-quickstart",
"prompt": "Is there a fly deploy command on this page?",
"expected_output": "yes"
},
{
"website": "https://news.ycombinator.com/",
"prompt": "Does this website contain a list of articles in a table markdown format?",
"expected_output": "yes"
},
{
"website": "https://www.vellum.ai/llm-leaderboard",
"prompt": "Does this website contain a model comparison table?",
"expected_output": "yes"
},
{
"website": "https://www.bigbadtoystore.com",
"prompt": "are there more than 3 toys in the new arrivals section?",
"expected_output": "yes"
},
{
"website": "https://www.instructables.com",
"prompt": "Does the site offer more than 5 links about circuits?",
"expected_output": "yes"
},
{
"website": "https://www.powells.com",
"prompt": "is there at least 10 books webpage links?",
"expected_output": "yes"
},
{
"website": "https://www.royalacademy.org.uk",
"prompt": "is there information on upcoming art exhibitions?",
"expected_output": "yes"
},
{
"website": "https://www.eastbaytimes.com",
"prompt": "Is there a Trending Nationally section that lists articles?",
"expected_output": "yes"
},
{
"website": "https://www.manchestereveningnews.co.uk",
"prompt": "is the content focused on Manchester sports news?",
"expected_output": "no"
},
{
"website": "https://physicsworld.com",
"prompt": "does the site provide at least 15 updates on the latest physics research?",
"expected_output": "yes"
},
{
"website": "https://richmondconfidential.org",
"prompt": "does the page contains more than 4 articles?",
"expected_output": "yes"
},
{
"website": "https://www.techinasia.com",
"prompt": "is there at least 10 articles of the startup scene in Asia?",
"expected_output": "yes",
"notes": "The website has a paywall and bot detectors."
},
{
"website": "https://www.boardgamegeek.com",
"prompt": "are there more than 5 board game news?",
"expected_output": "yes"
},
{
"website": "https://www.mountainproject.com",
"prompt": "Are there more than 3 climbing guides for Arizona?",
"expected_output": "yes"
}
]