import { PlaywrightCrawler } from 'crawlee';

const maxRepoCount = 100;

const crawler = new PlaywrightCrawler({
    requestHandler: async ({ page, infiniteScroll }) => {
        // Click the Load more button and scroll until
        // `maxRepoCount` repositories are found.
        console.log('Clicking and scrolling.');
        await infiniteScroll({
            buttonSelector: 'text=Load more',
            stopScrollCallback: async () => {
                const repoCount = (await page.$$('article.border')).length;
                return repoCount >= maxRepoCount;
            },
        });

        // Extract data from the page. Selecting all 'article' elements
        // will return all the repository cards we're looking for.
        const repos = await page.$$eval('article.border', (repoCards) => {
            return repoCards.map(card => {
                const [user, repo] = card.querySelectorAll('h3 a');
                const stars = card.querySelector('#repo-stars-counter-star').getAttribute('title');
                const description = card.querySelector('div.px-3 > p + div');
                const topics = card.querySelectorAll('a.topic-tag');

                const toText = (element) => element && element.innerText.trim();
                const parseNumber = (text) => Number(text.replace(/,/g, ''));

                return {
                    user: toText(user),
                    repo: toText(repo),
                    url: repo.href,
                    stars: parseNumber(stars),
                    description: toText(description),
                    topics: Array.from(topics).map((t) => toText(t)),
                };
            });
        });

        // Print the results. Nice!
        console.log(`We extracted ${repos.length} repositories.`);
        console.dir(repos);
    }
})

await crawler.run(['https://github.com/topics/javascript']);