screenshots allmost work

This commit is contained in:
Generic Developer 2024-09-28 01:10:17 +00:00
parent 3d3863f369
commit 1bcfead104
6 changed files with 84 additions and 68 deletions

View File

@ -30,3 +30,6 @@ RUN npm run build
EXPOSE 3000
# Start the application
CMD ["node", "build/server.js"]
# Create local storage directory and set permissions
RUN mkdir -p /app/local-storage && chmod 777 /app/local-storage

View File

@ -29,6 +29,8 @@ const md5Hasher = new HashManager('md5', 'hex');
// const logger = new Logger('Crawler');
import { TransferProtocolMetadata } from 'civkit';
import * as fs from 'fs';
import * as path from 'path';
function sendResponse<T>(res: Response, data: T, meta: TransferProtocolMetadata): T {
if (meta.code) {
@ -328,22 +330,19 @@ export class CrawlerHost extends RPCHost {
pageshotUrl?: string;
}, nominalUrl?: URL) {
console.log('Formatting snapshot', { mode, url: nominalUrl?.toString() });
const host = this.threadLocal.get('host') || '192.168.178.100:1337';
if (mode === 'screenshot') {
if (snapshot.screenshot && !snapshot.screenshotUrl) {
console.log('Saving screenshot');
const fid = `instant-screenshots/${randomUUID()}`;
await this.firebaseObjectStorage.saveFile(fid, snapshot.screenshot, {
metadata: {
contentType: 'image/png',
}
});
snapshot.screenshotUrl = await this.firebaseObjectStorage.signDownloadUrl(fid, Date.now() + this.urlValidMs);
const fileName = `screenshot-${randomUUID()}.png`;
const filePath = await this.saveFileLocally(fileName, snapshot.screenshot);
snapshot.screenshotUrl = `http://${host}/instant-screenshots/${fileName}`;
console.log('Screenshot saved and URL generated', { screenshotUrl: snapshot.screenshotUrl });
}
return {
...this.getGeneralSnapshotMixins(snapshot),
// html: snapshot.html,
screenshotUrl: snapshot.screenshotUrl,
toString() {
return this.screenshotUrl;
@ -353,13 +352,9 @@ export class CrawlerHost extends RPCHost {
if (mode === 'pageshot') {
if (snapshot.pageshot && !snapshot.pageshotUrl) {
console.log('Saving pageshot');
const fid = `instant-screenshots/${randomUUID()}`;
await this.firebaseObjectStorage.saveFile(fid, snapshot.pageshot, {
metadata: {
contentType: 'image/png',
}
});
snapshot.pageshotUrl = await this.firebaseObjectStorage.signDownloadUrl(fid, Date.now() + this.urlValidMs);
const fileName = `pageshot-${randomUUID()}.png`;
const filePath = await this.saveFileLocally(fileName, snapshot.pageshot);
snapshot.pageshotUrl = `http://${host}/instant-screenshots/${fileName}`;
console.log('Pageshot saved and URL generated', { pageshotUrl: snapshot.pageshotUrl });
}
@ -647,24 +642,28 @@ ${suffixMixins.length ? `\n${suffixMixins.join('\n\n')}\n` : ''}`;
let urlToCrawl;
const normalizeUrl = (await pNormalizeUrl).default;
try {
urlToCrawl = new URL(
normalizeUrl(
(crawlerOptions.url || noSlashURL).trim(),
{
stripWWW: false,
removeTrailingSlash: false,
removeSingleSlash: false,
sortQueryParameters: false,
}
)
);
console.log('Normalized URL to crawl:', urlToCrawl.toString());
const urlParam = req.query.url || req.url.slice(1);
const urlToNormalize = Array.isArray(urlParam) ? urlParam[0] : urlParam;
if (typeof urlToNormalize === 'string' && !urlToNormalize.startsWith('favicon.ico')) {
urlToCrawl = new URL(
normalizeUrl(
urlToNormalize.trim(),
{
stripWWW: false,
removeTrailingSlash: false,
removeSingleSlash: false,
sortQueryParameters: false,
}
)
);
console.log('Normalized URL to crawl:', urlToCrawl.toString());
} else {
console.log('Skipping invalid or favicon URL:', urlToNormalize);
return sendResponse(res, 'Skipped', { contentType: 'text/plain', envelope: null });
}
} catch (err) {
console.error('Error normalizing URL:', err);
throw new ParamValidationError({
message: `${err}`,
path: 'url'
});
return sendResponse(res, 'Invalid URL', { contentType: 'text/plain', envelope: null, code: 400 });
}
if (urlToCrawl.protocol !== 'http:' && urlToCrawl.protocol !== 'https:') {
console.error('Invalid protocol:', urlToCrawl.protocol);
@ -873,19 +872,17 @@ ${suffixMixins.length ? `\n${suffixMixins.join('\n\n')}\n` : ''}`;
this.threadLocal.set('keepImgDataUrl', opts.keepImgDataUrl);
this.threadLocal.set('cacheTolerance', opts.cacheTolerance);
this.threadLocal.set('userAgent', opts.userAgent);
this.threadLocal.set('host', req.headers.host || '192.168.178.100:1337');
if (opts.timeout) {
this.threadLocal.set('timeout', opts.timeout * 1000);
}
const cookies = req.headers['x-set-cookie'] ?
(Array.isArray(req.headers['x-set-cookie']) ? req.headers['x-set-cookie'] : [req.headers['x-set-cookie']])
.flatMap(cookieString =>
cookieString.split(';').map(cookie => {
const [name, ...valueParts] = cookie.trim().split('=');
const value = valueParts.join('=');
return { name, value, url: urlToCrawl.toString() };
})
)
.map(cookie => {
const [name, value] = cookie.split('=');
return { name, value, url: urlToCrawl.toString() };
})
: [];
console.log('Cookies:', cookies);
@ -937,4 +934,23 @@ ${suffixMixins.length ? `\n${suffixMixins.join('\n\n')}\n` : ''}`;
return this.formatSnapshot(mode, lastSnapshot, url);
}
async saveFileLocally(fileName: string, content: Buffer): Promise<string> {
const localDir = path.join('/app', 'local-storage', 'instant-screenshots');
console.log(`Attempting to save file in directory: ${localDir}`);
try {
if (!fs.existsSync(localDir)) {
console.log(`Directory ${localDir} does not exist. Creating it.`);
fs.mkdirSync(localDir, { recursive: true });
}
const filePath = path.join(localDir, fileName);
console.log(`Writing file to: ${filePath}`);
await fs.promises.writeFile(filePath, content);
console.log(`File successfully written to: ${filePath}`);
return filePath;
} catch (error) {
console.error(`Error saving file locally: ${error}`);
throw error;
}
}
}

View File

@ -1,22 +1,21 @@
import "reflect-metadata"
import 'reflect-metadata';
import express from 'express';
import { container } from 'tsyringe';
import { CrawlerHost } from './cloud-functions/crawler';
import path from 'path';
const app = express();
const port = process.env.PORT || 3000;
container.registerSingleton(CrawlerHost);
const crawlerHost = container.resolve(CrawlerHost);
app.use(express.json());
// Example curl for /crawl:
// curl -X GET "http://localhost:3000/https://example.com"
app.get('/:url(*)', async (req, res) => {
// Serve static files from the local-storage directory
app.use('/instant-screenshots', express.static(path.join('/app', 'local-storage', 'instant-screenshots')));
app.all('*', async (req, res) => {
try {
const url = req.params.url;
await crawlerHost.crawl(req, res);
} catch (error) {
console.error('Error during crawl:', error);
@ -24,14 +23,8 @@ app.get('/:url(*)', async (req, res) => {
}
});
// Example curl for /hello:
// curl -X GET "http://localhost:3000/hello"
app.get('/hello', (req, res) => {
res.json({ message: 'Hello, World!' });
});
app.listen(port, () => {
console.log(`Server is running on port ${port}`);
});
export default app;
export default app;

View File

@ -4,11 +4,12 @@ import { Logger } from './logger';
import { OutputServerEventStream } from './output-stream';
import { RPCReflect } from './rpc-reflect';
import { injectable } from 'tsyringe';
import * as fs from 'fs';
import * as path from 'path';
@injectable()
export class AsyncContext {
private storage: Map<string, any> = new Map();
set(key: string, value: any) {
this.storage.set(key, value);
}
@ -33,41 +34,44 @@ export function Param(name: string, options?: any): ParameterDecorator {
@injectable()
export class FirebaseStorageBucketControl {
bucket: any;
private localStorageDir: string;
constructor() {
this.bucket = {
file: (fileName: string) => ({
exists: async () => [true]
})
};
this.localStorageDir = path.join('/app', 'local-storage');
if (!fs.existsSync(this.localStorageDir)) {
fs.mkdirSync(this.localStorageDir, { recursive: true });
}
}
async uploadFile(filePath: string, destination: string): Promise<string> {
console.log(`Mock: Uploading file from ${filePath} to ${destination}`);
return `https://storage.googleapis.com/mock-bucket/${destination}`;
const destPath = path.join(this.localStorageDir, destination);
await fs.promises.copyFile(filePath, destPath);
return `file://${destPath}`;
}
async downloadFile(filePath: string, destination: string): Promise<void> {
console.log(`Mock: Downloading file from ${filePath} to ${destination}`);
const sourcePath = path.join(this.localStorageDir, filePath);
await fs.promises.copyFile(sourcePath, destination);
}
async deleteFile(filePath: string): Promise<void> {
console.log(`Mock: Deleting file ${filePath}`);
const fullPath = path.join(this.localStorageDir, filePath);
await fs.promises.unlink(fullPath);
}
async fileExists(filePath: string): Promise<boolean> {
console.log(`Mock: Checking if file ${filePath} exists`);
return true;
const fullPath = path.join(this.localStorageDir, filePath);
return fs.existsSync(fullPath);
}
async saveFile(filePath: string, content: Buffer, options?: any): Promise<void> {
console.log(`Mock: Saving file ${filePath}`);
const fullPath = path.join(this.localStorageDir, filePath);
await fs.promises.writeFile(fullPath, content);
}
async signDownloadUrl(filePath: string, expirationTime: number): Promise<string> {
console.log(`Mock: Signing download URL for ${filePath}`);
return `https://storage.googleapis.com/mock-bucket/${filePath}?token=mock-signed-url`;
const fullPath = path.join(this.localStorageDir, filePath);
return `file://${fullPath}`;
}
}

Binary file not shown.

After

Width:  |  Height:  |  Size: 151 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 151 KiB