/* Inspired by https://gist.github.com/shaneapen/3406477b9f946855d02e3f33ec121975 The script scrapes the members of a WhatsApp group chat and exports the data to a CSV file. It scrolls automatically and extracts each list item in the members list with all the information available. Then it joins this information with the indexedDB data to get the groups the member is in and if the contact's info if it is already saved in the phone. Steps: 1. Open WhatsApp Web 2. Open the group chat you want to scrape -> Click on the group name to open the group info -> Click on the members list 3. Open the browser console (F12) 4. Copy and paste the code below into the console and press Enter After the script has finished running, a download link will appear for the CSV file containing the scraped data. */ const SCROLL_INTERVAL = 1000, SCROLL_INCREMENT = 450, AUTO_SCROLL = true, CHECK_INDEXEDDB = true; var scrollInterval, observer, membersList, header, MEMBERS_QUEUE; class WhatsappDB { #db; #dbName = "model-storage"; #groupsCollection = "group-metadata"; #contactsCollection = "contact"; #phoneNumberIndex = "phoneNumber"; #participantsCollection = "participant"; #participantsIndex = "participants"; async openConnection() { if (!this.#db) { const dbName = this.#dbName; this.#db = await new Promise((resolve, reject) => { let request = indexedDB.open(dbName); request.onerror = (event) => { reject(event); }; request.onsuccess = (event) => { resolve(event.target.result); }; }); } return this.#db; } async #promisifyCol(collection, index, query, count) { const db = await this.openConnection(); return new Promise((resolve, reject) => { const transaction = db.transaction(collection, "readonly"); const objectStore = transaction.objectStore(collection); let request; if (index) { request = objectStore.index(index).getAll(query, count); } else { request = objectStore.getAll(query, count); } request.onerror = (event) => { reject(event); }; request.onsuccess = (event) => { resolve(event.target.result); }; }); } async getGroups() { return this.#promisifyCol(this.#groupsCollection); } async getParticipants(key) { return this.#promisifyCol(this.#participantsCollection, this.#participantsIndex, key); } async getContacts(key) { return this.#promisifyCol(this.#contactsCollection, this.#phoneNumberIndex, key); } phoneToKey(phone) { return `${phone.replace('+', '')}@c.us`; } } whatsappDB = new WhatsappDB(); var groups, contacts; MutationObserver = window.MutationObserver || window.WebKitMutationObserver; const autoScroll = function () { if (!scrollEndReached(header.nextSibling.nextSibling)) header.nextSibling.nextSibling.scrollTop += SCROLL_INCREMENT; else stop(); }; async function start() { MEMBERS_QUEUE = {}; if (CHECK_INDEXEDDB) { groups = await whatsappDB.getGroups(); contacts = await whatsappDB.getContacts(); } header = document.getElementsByTagName('header')[0]; membersList = header.parentNode; observer = new MutationObserver(function (mutations, observer) { scrapeData().then(r => { }); // fired when a mutation occurs }); // the div to watch for mutations observer.observe(membersList, { childList: true, subtree: true }); // scroll to top before beginning header.nextSibling.nextSibling.scrollTop = 0; await scrapeData(); if (AUTO_SCROLL) scrollInterval = setInterval(autoScroll, SCROLL_INTERVAL); } /** * Stops the current scrape instance */ const stop = function () { window.clearInterval(scrollInterval); observer.disconnect(); console.table(MEMBERS_QUEUE); console.log(`Scrape stopped. ${Object.keys(MEMBERS_QUEUE).length} members scraped.`); createDownloadLink(convertToCSV(Object.values(MEMBERS_QUEUE)), "whatsapp_members.csv"); }; async function scrapeData() { const members = membersList.querySelectorAll('[role=listitem] > [role=button]'); for (let i = 0; i < members.length; i++) { const member = members[i]; const details = await handleMember(member) if (details.name === "You") { continue; } if (details.phone) MEMBERS_QUEUE[details.phone] = details; else MEMBERS_QUEUE[details.name] = details; } } async function handleMember(member) { const title = getTitle(member); const phoneCaption = getPhone(member); const status = getStatus(member); const image = getImage(member); let memberGroups = []; let isSaved = false; // If contact unsaved - the phone is the caption or the title. // If contact saved - the phone is unavailable. let phone = phoneCaption ? phoneCaption : title.startsWith("+") ? title : null; const name = phoneCaption || !phone ? title : null; if (name && !phone) { const contact = await getContact(name); phone = contact ? `+${contact.phoneNumber.split('@')[0]}` : null; isSaved = !!contact; } if (phone) { phone = phone.replaceAll(/\s/g, '').replaceAll('-', '').replaceAll('(', '').replaceAll(')', ''); memberGroups = await getGroups(phone); } return { phone: phone, name: name, status: status, image: image, groups: JSON.stringify(memberGroups), isSaved: JSON.stringify(isSaved), }; } function getImage(member) { const img = member.querySelector('img'); if (!img) { return null; } return imageToDataURL(img); } function getStatus(member) { const status = member.querySelector('.copyable-text'); return status ? status.title : null; } function getPhone(member) { const phone = member.querySelector('span[aria-label=""]:not(span[title])'); return phone ? phone.innerHTML : null; } function getTitle(member) { const title = member.querySelector('span[title]'); return title ? title.title : null; } async function getGroups(phone) { if (!CHECK_INDEXEDDB) { return []; } const key = whatsappDB.phoneToKey(phone); const participants = await whatsappDB.getParticipants(key); return participants.map(participant => getGroupName(participant.groupId)); } function getGroupName(groupID) { const group = groups.find(group => group.id === groupID); return group ? group.subject : null; } function getContact(name) { if (!CHECK_INDEXEDDB) { return null; } return contacts.find(contact => contact.name === name); } /** * Helper functions * @References [1] https://stackoverflow.com/questions/53158796/get-scroll-position-with-reactjs/53158893#53158893 */ function scrollEndReached(el) { return ((el.scrollHeight - (el.clientHeight + el.scrollTop)) === 0); } function imageToDataURL(img) { img.crossOrigin = "anonymous"; // Create a canvas element const canvas = document.createElement('canvas'); canvas.width = img.naturalWidth || img.width; canvas.height = img.naturalHeight || img.height; // Draw the image onto the canvas const ctx = canvas.getContext('2d'); ctx.drawImage(img, 0, 0); // Get the Data URI of the image return canvas.toDataURL('image/png'); } function createDownloadLink(data, fileName) { var a = document.createElement('a'); a.style.display = "none"; var url = window.URL.createObjectURL(new Blob([data], { type: "data:attachment/text" })); a.setAttribute("href", url); a.setAttribute("download", fileName); document.body.append(a); a.click(); window.URL.revokeObjectURL(url); a.remove(); } // https://stackoverflow.com/questions/11257062/converting-json-object-to-csv-format-in-javascript function convertToCSV(arr) { const array = [Object.keys(arr[0])].concat(arr) return array.map(it => { return Object.values(it).map(value => { if (value == null) return ''; return `"${value.replace(/\"/g, "'")}"` }).toString() }).join('\n') } start().then(r => { console.log("Finished scraping.") });