Last active
August 29, 2015 14:15
-
-
Save JDWardle/72048a39be88b184c183 to your computer and use it in GitHub Desktop.
Binding of Isaac Wiki Scraper
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| console.log(scrapeHtml()); | |
| function cleanText (text) { | |
| var newText = text; | |
| if (text.toLowerCase().indexOf('[collapse]') > -1) { | |
| newText = newText.toLowerCase().split('[collapse]')[1].trim(); | |
| } | |
| return newText; | |
| } | |
| function getTableTitle (element) { | |
| var title = element.previousElementSibling; | |
| if (title.tagName.toLowerCase().indexOf('h') < 0) { | |
| return getTableTitle(title); | |
| } else { | |
| return cleanText(title.querySelector('.mw-headline').innerText.trim()); | |
| } | |
| } | |
| function retrieveData (table, headers) { | |
| var rows = table.querySelectorAll('tr'), | |
| tableData = { | |
| rows: [] | |
| }; | |
| tableData.name = getTableTitle(table); | |
| for (var r = 0; r < rows.length; r++) { | |
| var columns = rows[r].querySelectorAll('td'), | |
| row = {}; | |
| for (var c = 0; c < columns.length; c++) { | |
| var img = columns[c].querySelector('img'); | |
| if (img) { | |
| row[headers[c]] = img.src; | |
| } else { | |
| row[headers[c]] = cleanText(columns[c].innerText.trim()); | |
| } | |
| } | |
| if (Object.keys(row).length !== 0) { | |
| tableData.rows.push(row); | |
| } | |
| } | |
| return tableData; | |
| } | |
| function getTableHeaders (table) { | |
| var headers = table.querySelectorAll('tr:nth-child(1) > th'), | |
| tableHeaders = []; | |
| if (!headers.length) { | |
| headers = table.querySelectorAll('tr:nth-child(1) > td'); | |
| } | |
| for (var i = 0; i < headers.length; i++) { | |
| tableHeaders.push(cleanText(headers[i].innerText.toLowerCase())); | |
| } | |
| return tableHeaders; | |
| } | |
| function scrapeHtml () { | |
| var tables = document.querySelectorAll('.mw-collapsible'), | |
| data = []; | |
| for (var i = 0; i < tables.length; i++) { | |
| var headers = getTableHeaders(tables[i]); | |
| data.push(retrieveData(tables[i], headers)); | |
| } | |
| return data; | |
| } |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| function cleanText(e){var t=e;return e.toLowerCase().indexOf("[collapse]")>-1&&(t=t.toLowerCase().split("[collapse]")[1].trim()),t}function getTableTitle(e){var t=e.previousElementSibling;return t.tagName.toLowerCase().indexOf("h")<0?getTableTitle(t):cleanText(t.querySelector(".mw-headline").innerText.trim())}function retrieveData(e,t){var r=e.querySelectorAll("tr"),l={rows:[]};l.name=getTableTitle(e);for(var n=0;n<r.length;n++){for(var a=r[n].querySelectorAll("td"),o={},c=0;c<a.length;c++){var i=a[c].querySelector("img");o[t[c]]=i?i.src:cleanText(a[c].innerText.trim())}0!==Object.keys(o).length&&l.rows.push(o)}return l}function getTableHeaders(e){var t=e.querySelectorAll("tr:nth-child(1) > th"),r=[];t.length||(t=e.querySelectorAll("tr:nth-child(1) > td"));for(var l=0;l<t.length;l++)r.push(cleanText(t[l].innerText.toLowerCase()));return r}function scrapeHtml(){for(var e=document.querySelectorAll(".mw-collapsible"),t=[],r=0;r<e.length;r++){var l=getTableHeaders(e[r]);t.push(retrieveData(e[r],l))}return t}console.log(scrapeHtml()); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment