Seoxshop

// scraper.js import fetch from 'node-fetch'; import * as cheerio from 'cheerio'; import { URL } from 'url'; /** * Scrape a page and extract fields based on CSS selectors * @param {string} pageUrl - The URL to scrape * @param {Object} selectors - { fieldName: { css: string, attr?: string, multiple?: boolean } } */ async function scrapePage(pageUrl, selectors) { console.log(`Fetching: ${pageUrl}`); const res = await fetch(pageUrl, { headers: { 'User-Agent': 'ParserBotJS/1.0 (+https://example.com/bot)' } }); if (!res.ok) { console.error(`Failed to fetch ${pageUrl}: ${res.status}`); return null; } const html = await res.text(); const $ = cheerio.load(html); const result = { url: pageUrl }; for (const [field, sel] of Object.entries(selectors)) { const elements = $(sel.css); if (sel.multiple) { result[field] = elements.map((_, el) => sel.attr ? $(el).attr(sel.attr) : $(el).text().trim()).get(); } else { const el = elements.first(); result[field] = sel.attr ? el.attr(sel.attr) : el.text().trim(); } } return result; } // Example usage: (async () => { const selectors = { title: { css: 'h1' }, links: { css: 'a', attr: 'href', multiple: true } }; const data = await scrapePage('https://example.org', selectors); console.log(JSON.stringify(data, null, 2)); })();

Menu

0 Comments

Menu

Popular Posts

Projects

Subscribe Us

Report Abuse

About Me

Search This Blog

Blog Archive

Breaking

Recent In Internet

Facebook

Comments

Recent

Subscribe Us

Contact form

Menu

You may like these posts

0 Comments

Menu

Popular Posts

Projects

Subscribe Us

Report Abuse

About Me

Search This Blog

Blog Archive

Breaking

Recent In Internet

Facebook

Comments

Recent

Subscribe Us

Contact form