This commit is contained in:
Generic Developer 2024-09-28 02:31:42 +00:00
parent 025c8b67b0
commit 9b0a0b91b8
4 changed files with 17 additions and 6 deletions

View File

@ -1,5 +1,6 @@
# Use Node.js 18 slim image (Debian-based)
FROM node:18-slim
# Install necessary tools and libraries
RUN apt-get update && apt-get install -y \
chromium \
@ -13,23 +14,31 @@ RUN apt-get update && apt-get install -y \
&& apt-get update \
&& apt-get install -y google-chrome-stable \
&& rm -rf /var/lib/apt/lists/*
# Set environment variables
ENV PUPPETEER_SKIP_CHROMIUM_DOWNLOAD true
ENV PUPPETEER_EXECUTABLE_PATH /usr/bin/google-chrome-stable
ENV PUPPETEER_SKIP_CHROMIUM_DOWNLOAD=true
ENV PUPPETEER_EXECUTABLE_PATH=/usr/bin/google-chrome-stable
# Set working directory
WORKDIR /app
# Copy package.json and package-lock.json
COPY backend/functions/package*.json ./
# Install dependencies
RUN npm ci
# Copy the rest of the application code
COPY backend/functions .
# Build the application
RUN npm run build
# Expose the port the app runs on
EXPOSE 3000
# Start the application
CMD ["node", "build/server.js"]
# Create local storage directory and set permissions
RUN mkdir -p /app/local-storage && chmod 777 /app/local-storage
# Expose the port the app runs on
EXPOSE 3000
# Start the application
CMD ["node", "build/server.js"]

View File

@ -612,6 +612,7 @@ ${suffixMixins.length ? `\n${suffixMixins.join('\n\n')}\n` : ''}`;
}
async crawl(req: Request, res: Response) {
this.logger.info(`Crawl request received for URL: ${req.url}`);
console.log('Crawl method called with request:', req.url);
const ctx = { req, res };
console.log(`req.headers: ${JSON.stringify(req.headers)}`);
@ -730,6 +731,7 @@ ${suffixMixins.length ? `\n${suffixMixins.join('\n\n')}\n` : ''}`;
}
async *scrap(urlToCrawl: URL, crawlOpts?: ExtraScrappingOptions, crawlerOpts?: CrawlerOptions) {
this.logger.info(`Starting scrap for URL: ${urlToCrawl.toString()}`);
console.log('Starting scrap for URL:', urlToCrawl.toString());
console.log('Crawl options:', crawlOpts);
console.log('Crawler options:', crawlerOpts);

Binary file not shown.

After

Width:  |  Height:  |  Size: 1.6 MiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 151 KiB