How to do web scrapping with Nextjs and Puppeteer
Unanswered
Bishal Saha posted this in #help-forum
I am trying to do web scarping with Nextjs but while it works on the local env when deployed through Vercel it fails badly giving a 404 response page. Is anything in particular I am missing or it can't be done due to limitations?
I tried puppeteer-core and chrome-aws-lambda but none of them also works. Either fails or 500 error.
Here is my code sample that works on local but fails in deployment with 404:
// pages/api/scrape.js
import puppeteer from 'puppeteer';
export default async function handler(req, res) {
const { url } = req.query;
if (!url) {
return res.status(400).json({ error: 'Please provide a URL to scrape' });
}
let browser = null;
try {
// Launch Puppeteer
browser = await puppeteer.launch();
const page = await browser.newPage();
// Navigate to the URL
await page.goto(url, { waitUntil: 'networkidle2' });
// Extract text
const text = await page.evaluate(() => document.body.innerText);
// Send the text as response
res.status(200).json({ text });
} catch (error) {
console.error(error);
res.status(500).json({ error: 'Failed to scrape the page' });
} finally {
if (browser !== null) {
await browser.close();
}
}
}
I tried puppeteer-core and chrome-aws-lambda but none of them also works. Either fails or 500 error.
Here is my code sample that works on local but fails in deployment with 404:
// pages/api/scrape.js
import puppeteer from 'puppeteer';
export default async function handler(req, res) {
const { url } = req.query;
if (!url) {
return res.status(400).json({ error: 'Please provide a URL to scrape' });
}
let browser = null;
try {
// Launch Puppeteer
browser = await puppeteer.launch();
const page = await browser.newPage();
// Navigate to the URL
await page.goto(url, { waitUntil: 'networkidle2' });
// Extract text
const text = await page.evaluate(() => document.body.innerText);
// Send the text as response
res.status(200).json({ text });
} catch (error) {
console.error(error);
res.status(500).json({ error: 'Failed to scrape the page' });
} finally {
if (browser !== null) {
await browser.close();
}
}
}
2 Replies
that was an accidential report oops