mirror of
https://github.com/intergalacticalvariable/reader.git
synced 2024-11-15 19:22:20 +08:00
screenshots allmost work
This commit is contained in:
parent
3d3863f369
commit
1bcfead104
|
@ -30,3 +30,6 @@ RUN npm run build
|
|||
EXPOSE 3000
|
||||
# Start the application
|
||||
CMD ["node", "build/server.js"]
|
||||
|
||||
# Create local storage directory and set permissions
|
||||
RUN mkdir -p /app/local-storage && chmod 777 /app/local-storage
|
||||
|
|
|
@ -29,6 +29,8 @@ const md5Hasher = new HashManager('md5', 'hex');
|
|||
// const logger = new Logger('Crawler');
|
||||
|
||||
import { TransferProtocolMetadata } from 'civkit';
|
||||
import * as fs from 'fs';
|
||||
import * as path from 'path';
|
||||
|
||||
function sendResponse<T>(res: Response, data: T, meta: TransferProtocolMetadata): T {
|
||||
if (meta.code) {
|
||||
|
@ -328,22 +330,19 @@ export class CrawlerHost extends RPCHost {
|
|||
pageshotUrl?: string;
|
||||
}, nominalUrl?: URL) {
|
||||
console.log('Formatting snapshot', { mode, url: nominalUrl?.toString() });
|
||||
const host = this.threadLocal.get('host') || '192.168.178.100:1337';
|
||||
|
||||
if (mode === 'screenshot') {
|
||||
if (snapshot.screenshot && !snapshot.screenshotUrl) {
|
||||
console.log('Saving screenshot');
|
||||
const fid = `instant-screenshots/${randomUUID()}`;
|
||||
await this.firebaseObjectStorage.saveFile(fid, snapshot.screenshot, {
|
||||
metadata: {
|
||||
contentType: 'image/png',
|
||||
}
|
||||
});
|
||||
snapshot.screenshotUrl = await this.firebaseObjectStorage.signDownloadUrl(fid, Date.now() + this.urlValidMs);
|
||||
const fileName = `screenshot-${randomUUID()}.png`;
|
||||
const filePath = await this.saveFileLocally(fileName, snapshot.screenshot);
|
||||
snapshot.screenshotUrl = `http://${host}/instant-screenshots/${fileName}`;
|
||||
console.log('Screenshot saved and URL generated', { screenshotUrl: snapshot.screenshotUrl });
|
||||
}
|
||||
|
||||
return {
|
||||
...this.getGeneralSnapshotMixins(snapshot),
|
||||
// html: snapshot.html,
|
||||
screenshotUrl: snapshot.screenshotUrl,
|
||||
toString() {
|
||||
return this.screenshotUrl;
|
||||
|
@ -353,13 +352,9 @@ export class CrawlerHost extends RPCHost {
|
|||
if (mode === 'pageshot') {
|
||||
if (snapshot.pageshot && !snapshot.pageshotUrl) {
|
||||
console.log('Saving pageshot');
|
||||
const fid = `instant-screenshots/${randomUUID()}`;
|
||||
await this.firebaseObjectStorage.saveFile(fid, snapshot.pageshot, {
|
||||
metadata: {
|
||||
contentType: 'image/png',
|
||||
}
|
||||
});
|
||||
snapshot.pageshotUrl = await this.firebaseObjectStorage.signDownloadUrl(fid, Date.now() + this.urlValidMs);
|
||||
const fileName = `pageshot-${randomUUID()}.png`;
|
||||
const filePath = await this.saveFileLocally(fileName, snapshot.pageshot);
|
||||
snapshot.pageshotUrl = `http://${host}/instant-screenshots/${fileName}`;
|
||||
console.log('Pageshot saved and URL generated', { pageshotUrl: snapshot.pageshotUrl });
|
||||
}
|
||||
|
||||
|
@ -647,24 +642,28 @@ ${suffixMixins.length ? `\n${suffixMixins.join('\n\n')}\n` : ''}`;
|
|||
let urlToCrawl;
|
||||
const normalizeUrl = (await pNormalizeUrl).default;
|
||||
try {
|
||||
urlToCrawl = new URL(
|
||||
normalizeUrl(
|
||||
(crawlerOptions.url || noSlashURL).trim(),
|
||||
{
|
||||
stripWWW: false,
|
||||
removeTrailingSlash: false,
|
||||
removeSingleSlash: false,
|
||||
sortQueryParameters: false,
|
||||
}
|
||||
)
|
||||
);
|
||||
console.log('Normalized URL to crawl:', urlToCrawl.toString());
|
||||
const urlParam = req.query.url || req.url.slice(1);
|
||||
const urlToNormalize = Array.isArray(urlParam) ? urlParam[0] : urlParam;
|
||||
if (typeof urlToNormalize === 'string' && !urlToNormalize.startsWith('favicon.ico')) {
|
||||
urlToCrawl = new URL(
|
||||
normalizeUrl(
|
||||
urlToNormalize.trim(),
|
||||
{
|
||||
stripWWW: false,
|
||||
removeTrailingSlash: false,
|
||||
removeSingleSlash: false,
|
||||
sortQueryParameters: false,
|
||||
}
|
||||
)
|
||||
);
|
||||
console.log('Normalized URL to crawl:', urlToCrawl.toString());
|
||||
} else {
|
||||
console.log('Skipping invalid or favicon URL:', urlToNormalize);
|
||||
return sendResponse(res, 'Skipped', { contentType: 'text/plain', envelope: null });
|
||||
}
|
||||
} catch (err) {
|
||||
console.error('Error normalizing URL:', err);
|
||||
throw new ParamValidationError({
|
||||
message: `${err}`,
|
||||
path: 'url'
|
||||
});
|
||||
return sendResponse(res, 'Invalid URL', { contentType: 'text/plain', envelope: null, code: 400 });
|
||||
}
|
||||
if (urlToCrawl.protocol !== 'http:' && urlToCrawl.protocol !== 'https:') {
|
||||
console.error('Invalid protocol:', urlToCrawl.protocol);
|
||||
|
@ -873,19 +872,17 @@ ${suffixMixins.length ? `\n${suffixMixins.join('\n\n')}\n` : ''}`;
|
|||
this.threadLocal.set('keepImgDataUrl', opts.keepImgDataUrl);
|
||||
this.threadLocal.set('cacheTolerance', opts.cacheTolerance);
|
||||
this.threadLocal.set('userAgent', opts.userAgent);
|
||||
this.threadLocal.set('host', req.headers.host || '192.168.178.100:1337');
|
||||
if (opts.timeout) {
|
||||
this.threadLocal.set('timeout', opts.timeout * 1000);
|
||||
}
|
||||
|
||||
const cookies = req.headers['x-set-cookie'] ?
|
||||
(Array.isArray(req.headers['x-set-cookie']) ? req.headers['x-set-cookie'] : [req.headers['x-set-cookie']])
|
||||
.flatMap(cookieString =>
|
||||
cookieString.split(';').map(cookie => {
|
||||
const [name, ...valueParts] = cookie.trim().split('=');
|
||||
const value = valueParts.join('=');
|
||||
return { name, value, url: urlToCrawl.toString() };
|
||||
})
|
||||
)
|
||||
.map(cookie => {
|
||||
const [name, value] = cookie.split('=');
|
||||
return { name, value, url: urlToCrawl.toString() };
|
||||
})
|
||||
: [];
|
||||
|
||||
console.log('Cookies:', cookies);
|
||||
|
@ -937,4 +934,23 @@ ${suffixMixins.length ? `\n${suffixMixins.join('\n\n')}\n` : ''}`;
|
|||
|
||||
return this.formatSnapshot(mode, lastSnapshot, url);
|
||||
}
|
||||
|
||||
async saveFileLocally(fileName: string, content: Buffer): Promise<string> {
|
||||
const localDir = path.join('/app', 'local-storage', 'instant-screenshots');
|
||||
console.log(`Attempting to save file in directory: ${localDir}`);
|
||||
try {
|
||||
if (!fs.existsSync(localDir)) {
|
||||
console.log(`Directory ${localDir} does not exist. Creating it.`);
|
||||
fs.mkdirSync(localDir, { recursive: true });
|
||||
}
|
||||
const filePath = path.join(localDir, fileName);
|
||||
console.log(`Writing file to: ${filePath}`);
|
||||
await fs.promises.writeFile(filePath, content);
|
||||
console.log(`File successfully written to: ${filePath}`);
|
||||
return filePath;
|
||||
} catch (error) {
|
||||
console.error(`Error saving file locally: ${error}`);
|
||||
throw error;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1,22 +1,21 @@
|
|||
import "reflect-metadata"
|
||||
import 'reflect-metadata';
|
||||
import express from 'express';
|
||||
import { container } from 'tsyringe';
|
||||
import { CrawlerHost } from './cloud-functions/crawler';
|
||||
import path from 'path';
|
||||
|
||||
const app = express();
|
||||
const port = process.env.PORT || 3000;
|
||||
|
||||
container.registerSingleton(CrawlerHost);
|
||||
|
||||
const crawlerHost = container.resolve(CrawlerHost);
|
||||
|
||||
app.use(express.json());
|
||||
|
||||
// Example curl for /crawl:
|
||||
// curl -X GET "http://localhost:3000/https://example.com"
|
||||
app.get('/:url(*)', async (req, res) => {
|
||||
// Serve static files from the local-storage directory
|
||||
app.use('/instant-screenshots', express.static(path.join('/app', 'local-storage', 'instant-screenshots')));
|
||||
|
||||
app.all('*', async (req, res) => {
|
||||
try {
|
||||
const url = req.params.url;
|
||||
await crawlerHost.crawl(req, res);
|
||||
} catch (error) {
|
||||
console.error('Error during crawl:', error);
|
||||
|
@ -24,14 +23,8 @@ app.get('/:url(*)', async (req, res) => {
|
|||
}
|
||||
});
|
||||
|
||||
// Example curl for /hello:
|
||||
// curl -X GET "http://localhost:3000/hello"
|
||||
app.get('/hello', (req, res) => {
|
||||
res.json({ message: 'Hello, World!' });
|
||||
});
|
||||
|
||||
app.listen(port, () => {
|
||||
console.log(`Server is running on port ${port}`);
|
||||
});
|
||||
|
||||
export default app;
|
||||
export default app;
|
||||
|
|
|
@ -4,11 +4,12 @@ import { Logger } from './logger';
|
|||
import { OutputServerEventStream } from './output-stream';
|
||||
import { RPCReflect } from './rpc-reflect';
|
||||
import { injectable } from 'tsyringe';
|
||||
import * as fs from 'fs';
|
||||
import * as path from 'path';
|
||||
|
||||
@injectable()
|
||||
export class AsyncContext {
|
||||
private storage: Map<string, any> = new Map();
|
||||
|
||||
set(key: string, value: any) {
|
||||
this.storage.set(key, value);
|
||||
}
|
||||
|
@ -33,41 +34,44 @@ export function Param(name: string, options?: any): ParameterDecorator {
|
|||
|
||||
@injectable()
|
||||
export class FirebaseStorageBucketControl {
|
||||
bucket: any;
|
||||
private localStorageDir: string;
|
||||
|
||||
constructor() {
|
||||
this.bucket = {
|
||||
file: (fileName: string) => ({
|
||||
exists: async () => [true]
|
||||
})
|
||||
};
|
||||
this.localStorageDir = path.join('/app', 'local-storage');
|
||||
if (!fs.existsSync(this.localStorageDir)) {
|
||||
fs.mkdirSync(this.localStorageDir, { recursive: true });
|
||||
}
|
||||
}
|
||||
|
||||
async uploadFile(filePath: string, destination: string): Promise<string> {
|
||||
console.log(`Mock: Uploading file from ${filePath} to ${destination}`);
|
||||
return `https://storage.googleapis.com/mock-bucket/${destination}`;
|
||||
const destPath = path.join(this.localStorageDir, destination);
|
||||
await fs.promises.copyFile(filePath, destPath);
|
||||
return `file://${destPath}`;
|
||||
}
|
||||
|
||||
async downloadFile(filePath: string, destination: string): Promise<void> {
|
||||
console.log(`Mock: Downloading file from ${filePath} to ${destination}`);
|
||||
const sourcePath = path.join(this.localStorageDir, filePath);
|
||||
await fs.promises.copyFile(sourcePath, destination);
|
||||
}
|
||||
|
||||
async deleteFile(filePath: string): Promise<void> {
|
||||
console.log(`Mock: Deleting file ${filePath}`);
|
||||
const fullPath = path.join(this.localStorageDir, filePath);
|
||||
await fs.promises.unlink(fullPath);
|
||||
}
|
||||
|
||||
async fileExists(filePath: string): Promise<boolean> {
|
||||
console.log(`Mock: Checking if file ${filePath} exists`);
|
||||
return true;
|
||||
const fullPath = path.join(this.localStorageDir, filePath);
|
||||
return fs.existsSync(fullPath);
|
||||
}
|
||||
|
||||
async saveFile(filePath: string, content: Buffer, options?: any): Promise<void> {
|
||||
console.log(`Mock: Saving file ${filePath}`);
|
||||
const fullPath = path.join(this.localStorageDir, filePath);
|
||||
await fs.promises.writeFile(fullPath, content);
|
||||
}
|
||||
|
||||
async signDownloadUrl(filePath: string, expirationTime: number): Promise<string> {
|
||||
console.log(`Mock: Signing download URL for ${filePath}`);
|
||||
return `https://storage.googleapis.com/mock-bucket/${filePath}?token=mock-signed-url`;
|
||||
const fullPath = path.join(this.localStorageDir, filePath);
|
||||
return `file://${fullPath}`;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
Binary file not shown.
After Width: | Height: | Size: 151 KiB |
Binary file not shown.
After Width: | Height: | Size: 151 KiB |
Loading…
Reference in New Issue
Block a user