import { PlaywrightCrawler } from 'crawlee'; const maxRepoCount = 100; const crawler = new PlaywrightCrawler({ requestHandler: async ({ page, infiniteScroll }) => { // Click the Load more button and scroll until // `maxRepoCount` repositories are found. console.log('Clicking and scrolling.'); await infiniteScroll({ buttonSelector: 'text=Load more', stopScrollCallback: async () => { const repoCount = (await page.$$('article.border')).length; return repoCount >= maxRepoCount; }, }); // Extract data from the page. Selecting all 'article' elements // will return all the repository cards we're looking for. const repos = await page.$$eval('article.border', (repoCards) => { return repoCards.map(card => { const [user, repo] = card.querySelectorAll('h3 a'); const stars = card.querySelector('#repo-stars-counter-star').getAttribute('title'); const description = card.querySelector('div.px-3 > p + div'); const topics = card.querySelectorAll('a.topic-tag'); const toText = (element) => element && element.innerText.trim(); const parseNumber = (text) => Number(text.replace(/,/g, '')); return { user: toText(user), repo: toText(repo), url: repo.href, stars: parseNumber(stars), description: toText(description), topics: Array.from(topics).map((t) => toText(t)), }; }); }); // Print the results. Nice! console.log(`We extracted ${repos.length} repositories.`); console.dir(repos); } }) await crawler.run(['https://github.com/topics/javascript']);