mirror of
https://github.com/mendableai/firecrawl.git
synced 2024-11-16 11:42:24 +08:00
Merge pull request #109 from mendableai/feat/posthog-logging
Add Posthog Logging
This commit is contained in:
commit
784b81e6d6
4
.github/workflows/ci.yml
vendored
4
.github/workflows/ci.yml
vendored
|
@ -13,6 +13,8 @@ env:
|
|||
HOST: ${{ secrets.HOST }}
|
||||
LLAMAPARSE_API_KEY: ${{ secrets.LLAMAPARSE_API_KEY }}
|
||||
LOGTAIL_KEY: ${{ secrets.LOGTAIL_KEY }}
|
||||
POSTHOG_API_KEY: ${{ secrets.POSTHOG_API_KEY }}
|
||||
POSTHOG_HOST: ${{ secrets.POSTHOG_HOST }}
|
||||
NUM_WORKERS_PER_QUEUE: ${{ secrets.NUM_WORKERS_PER_QUEUE }}
|
||||
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
|
||||
PLAYWRIGHT_MICROSERVICE_URL: ${{ secrets.PLAYWRIGHT_MICROSERVICE_URL }}
|
||||
|
@ -38,7 +40,7 @@ jobs:
|
|||
- name: Set up Node.js
|
||||
uses: actions/setup-node@v3
|
||||
with:
|
||||
node-version: '20'
|
||||
node-version: "20"
|
||||
- name: Install pnpm
|
||||
run: npm install -g pnpm
|
||||
- name: Install dependencies
|
||||
|
|
5
.github/workflows/fly.yml
vendored
5
.github/workflows/fly.yml
vendored
|
@ -13,6 +13,8 @@ env:
|
|||
HOST: ${{ secrets.HOST }}
|
||||
LLAMAPARSE_API_KEY: ${{ secrets.LLAMAPARSE_API_KEY }}
|
||||
LOGTAIL_KEY: ${{ secrets.LOGTAIL_KEY }}
|
||||
POSTHOG_API_KEY: ${{ secrets.POSTHOG_API_KEY }}
|
||||
POSTHOG_HOST: ${{ secrets.POSTHOG_HOST }}
|
||||
NUM_WORKERS_PER_QUEUE: ${{ secrets.NUM_WORKERS_PER_QUEUE }}
|
||||
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
|
||||
PLAYWRIGHT_MICROSERVICE_URL: ${{ secrets.PLAYWRIGHT_MICROSERVICE_URL }}
|
||||
|
@ -38,7 +40,7 @@ jobs:
|
|||
- name: Set up Node.js
|
||||
uses: actions/setup-node@v3
|
||||
with:
|
||||
node-version: '20'
|
||||
node-version: "20"
|
||||
- name: Install pnpm
|
||||
run: npm install -g pnpm
|
||||
- name: Install dependencies
|
||||
|
@ -68,4 +70,3 @@ jobs:
|
|||
- run: flyctl deploy ./apps/api --remote-only -a firecrawl-scraper-js
|
||||
env:
|
||||
FLY_API_TOKEN: ${{ secrets.FLY_API_TOKEN }}
|
||||
|
||||
|
|
|
@ -4,20 +4,20 @@ Welcome to [Firecrawl](https://firecrawl.dev) 🔥! Here are some instructions o
|
|||
|
||||
If you're contributing, note that the process is similar to other open source repos i.e. (fork firecrawl, make changes, run tests, PR). If you have any questions, and would like help gettin on board, reach out to hello@mendable.ai for more or submit an issue!
|
||||
|
||||
|
||||
## Running the project locally
|
||||
|
||||
First, start by installing dependencies
|
||||
|
||||
1. node.js [instructions](https://nodejs.org/en/learn/getting-started/how-to-install-nodejs)
|
||||
2. pnpm [instructions](https://pnpm.io/installation)
|
||||
3. redis [instructions](https://redis.io/docs/latest/operate/oss_and_stack/install/install-redis/)
|
||||
|
||||
|
||||
Set environment variables in a .env in the /apps/api/ directoryyou can copy over the template in .env.example.
|
||||
Set environment variables in a .env in the /apps/api/ directoryyou can copy over the template in .env.example.
|
||||
|
||||
To start, we wont set up authentication, or any optional sub services (pdf parsing, JS blocking support, AI features )
|
||||
|
||||
.env:
|
||||
|
||||
```
|
||||
# ===== Required ENVS ======
|
||||
NUM_WORKERS_PER_QUEUE=8
|
||||
|
@ -43,6 +43,11 @@ BULL_AUTH_KEY= #
|
|||
LOGTAIL_KEY= # Use if you're configuring basic logging with logtail
|
||||
PLAYWRIGHT_MICROSERVICE_URL= # set if you'd like to run a playwright fallback
|
||||
LLAMAPARSE_API_KEY= #Set if you have a llamaparse key you'd like to use to parse pdfs
|
||||
SERPER_API_KEY= #Set if you have a serper key you'd like to use as a search api
|
||||
SLACK_WEBHOOK_URL= # set if you'd like to send slack server health status messages
|
||||
POSTHOG_API_KEY= # set if you'd like to send posthog events like job logs
|
||||
POSTHOG_HOST= # set if you'd like to send posthog events like job logs
|
||||
|
||||
|
||||
```
|
||||
|
||||
|
@ -69,6 +74,7 @@ redis-server
|
|||
### Terminal 2 - setting up workers
|
||||
|
||||
Now, navigate to the apps/api/ directory and run:
|
||||
|
||||
```bash
|
||||
pnpm run workers
|
||||
```
|
||||
|
@ -77,7 +83,6 @@ This will start the workers who are responsible for processing crawl jobs.
|
|||
|
||||
### Terminal 3 - setting up the main server
|
||||
|
||||
|
||||
To do this, navigate to the apps/api/ directory and run if you don’t have this already, install pnpm here: https://pnpm.io/installation
|
||||
Next, run your server with:
|
||||
|
||||
|
@ -92,8 +97,8 @@ Alright: now let’s send our first request.
|
|||
```curl
|
||||
curl -X GET http://localhost:3002/test
|
||||
```
|
||||
This should return the response Hello, world!
|
||||
|
||||
This should return the response Hello, world!
|
||||
|
||||
If you’d like to test the crawl endpoint, you can run this
|
||||
|
||||
|
@ -110,5 +115,3 @@ curl -X POST http://localhost:3002/v0/crawl \
|
|||
The best way to do this is run the test with `npm run test:local-no-auth` if you'd like to run the tests without authentication.
|
||||
|
||||
If you'd like to run the tests with authentication, run `npm run test:prod`
|
||||
|
||||
|
||||
|
|
|
@ -24,3 +24,6 @@ PLAYWRIGHT_MICROSERVICE_URL= # set if you'd like to run a playwright fallback
|
|||
LLAMAPARSE_API_KEY= #Set if you have a llamaparse key you'd like to use to parse pdfs
|
||||
SERPER_API_KEY= #Set if you have a serper key you'd like to use as a search api
|
||||
SLACK_WEBHOOK_URL= # set if you'd like to send slack server health status messages
|
||||
POSTHOG_API_KEY= # set if you'd like to send posthog events like job logs
|
||||
POSTHOG_HOST= # set if you'd like to send posthog events like job logs
|
||||
|
||||
|
|
|
@ -82,6 +82,7 @@
|
|||
"openai": "^4.28.4",
|
||||
"pdf-parse": "^1.1.1",
|
||||
"pos": "^0.4.2",
|
||||
"posthog-node": "^4.0.1",
|
||||
"promptable": "^0.0.9",
|
||||
"puppeteer": "^22.6.3",
|
||||
"rate-limiter-flexible": "^2.4.2",
|
||||
|
|
|
@ -128,6 +128,9 @@ dependencies:
|
|||
pos:
|
||||
specifier: ^0.4.2
|
||||
version: 0.4.2
|
||||
posthog-node:
|
||||
specifier: ^4.0.1
|
||||
version: 4.0.1
|
||||
promptable:
|
||||
specifier: ^0.0.9
|
||||
version: 0.0.9
|
||||
|
@ -5068,6 +5071,16 @@ packages:
|
|||
source-map-js: 1.0.2
|
||||
dev: false
|
||||
|
||||
/posthog-node@4.0.1:
|
||||
resolution: {integrity: sha512-rtqm2h22QxLGBrW2bLYzbRhliIrqgZ0k+gF0LkQ1SNdeD06YE5eilV0MxZppFSxC8TfH0+B0cWCuebEnreIDgQ==}
|
||||
engines: {node: '>=15.0.0'}
|
||||
dependencies:
|
||||
axios: 1.6.7
|
||||
rusha: 0.8.14
|
||||
transitivePeerDependencies:
|
||||
- debug
|
||||
dev: false
|
||||
|
||||
/prelude-ls@1.1.2:
|
||||
resolution: {integrity: sha512-ESF23V4SKG6lVSGZgYNpbsiaAkdab6ZgOxe52p7+Kid3W3u3bxR4Vfd/o21dmN7jSt0IwgZ4v5MUd26FEtXE9w==}
|
||||
engines: {node: '>= 0.8.0'}
|
||||
|
@ -5330,6 +5343,10 @@ packages:
|
|||
engines: {node: '>=10.0.0'}
|
||||
dev: false
|
||||
|
||||
/rusha@0.8.14:
|
||||
resolution: {integrity: sha512-cLgakCUf6PedEu15t8kbsjnwIFFR2D4RfL+W3iWFJ4iac7z4B0ZI8fxy4R3J956kAI68HclCFGL8MPoUVC3qVA==}
|
||||
dev: false
|
||||
|
||||
/safe-buffer@5.2.1:
|
||||
resolution: {integrity: sha512-rp3So07KcdmmKbGvgaNxQSJr7bGVSVk5S9Eq1F+ppbRo70+YeaDxkw5Dd8NPN+GD6bjnYm2VuPuCXmpuYvmCXQ==}
|
||||
|
||||
|
|
|
@ -25,6 +25,8 @@ describe("E2E Tests for API Routes with No Authentication", () => {
|
|||
process.env.PLAYWRIGHT_MICROSERVICE_URL = "";
|
||||
process.env.LLAMAPARSE_API_KEY = "";
|
||||
process.env.TEST_API_KEY = "";
|
||||
process.env.POSTHOG_API_KEY = "";
|
||||
process.env.POSTHOG_HOST = "";
|
||||
});
|
||||
|
||||
// restore original process.env
|
||||
|
|
|
@ -1,6 +1,7 @@
|
|||
import { ExtractorOptions } from './../../lib/entities';
|
||||
import { supabase_service } from "../supabase";
|
||||
import { FirecrawlJob } from "../../types";
|
||||
import { posthog } from "../posthog";
|
||||
import "dotenv/config";
|
||||
|
||||
export async function logJob(job: FirecrawlJob) {
|
||||
|
@ -10,7 +11,6 @@ export async function logJob(job: FirecrawlJob) {
|
|||
return;
|
||||
}
|
||||
|
||||
|
||||
const { data, error } = await supabase_service
|
||||
.from("firecrawl_jobs")
|
||||
.insert([
|
||||
|
@ -30,6 +30,27 @@ export async function logJob(job: FirecrawlJob) {
|
|||
num_tokens: job.num_tokens
|
||||
},
|
||||
]);
|
||||
|
||||
if (process.env.POSTHOG_API_KEY) {
|
||||
posthog.capture({
|
||||
distinctId: job.team_id === "preview" ? null : job.team_id,
|
||||
event: "job-logged",
|
||||
properties: {
|
||||
success: job.success,
|
||||
message: job.message,
|
||||
num_docs: job.num_docs,
|
||||
time_taken: job.time_taken,
|
||||
team_id: job.team_id === "preview" ? null : job.team_id,
|
||||
mode: job.mode,
|
||||
url: job.url,
|
||||
crawler_options: job.crawlerOptions,
|
||||
page_options: job.pageOptions,
|
||||
origin: job.origin,
|
||||
extractor_options: job.extractor_options,
|
||||
num_tokens: job.num_tokens
|
||||
},
|
||||
});
|
||||
}
|
||||
if (error) {
|
||||
console.error("Error logging job:\n", error);
|
||||
}
|
||||
|
|
26
apps/api/src/services/posthog.ts
Normal file
26
apps/api/src/services/posthog.ts
Normal file
|
@ -0,0 +1,26 @@
|
|||
import { PostHog } from 'posthog-node';
|
||||
import "dotenv/config";
|
||||
|
||||
export default function PostHogClient() {
|
||||
const posthogClient = new PostHog(process.env.POSTHOG_API_KEY, {
|
||||
host: process.env.POSTHOG_HOST,
|
||||
flushAt: 1,
|
||||
flushInterval: 0
|
||||
});
|
||||
return posthogClient;
|
||||
}
|
||||
|
||||
class MockPostHog {
|
||||
capture() {}
|
||||
}
|
||||
|
||||
// Using the actual PostHog class if POSTHOG_API_KEY exists, otherwise using the mock class
|
||||
// Additionally, print a warning to the terminal if POSTHOG_API_KEY is not provided
|
||||
export const posthog = process.env.POSTHOG_API_KEY
|
||||
? PostHogClient()
|
||||
: (() => {
|
||||
console.warn(
|
||||
"POSTHOG_API_KEY is not provided - your events will not be logged. Using MockPostHog as a fallback. See posthog.ts for more."
|
||||
);
|
||||
return new MockPostHog();
|
||||
})();
|
Loading…
Reference in New Issue
Block a user