Skip to content

Instantly share code, notes, and snippets.

@JDWardle
Last active August 29, 2015 14:15
Show Gist options
  • Select an option

  • Save JDWardle/72048a39be88b184c183 to your computer and use it in GitHub Desktop.

Select an option

Save JDWardle/72048a39be88b184c183 to your computer and use it in GitHub Desktop.
Binding of Isaac Wiki Scraper
console.log(scrapeHtml());
function cleanText (text) {
var newText = text;
if (text.toLowerCase().indexOf('[collapse]') > -1) {
newText = newText.toLowerCase().split('[collapse]')[1].trim();
}
return newText;
}
function getTableTitle (element) {
var title = element.previousElementSibling;
if (title.tagName.toLowerCase().indexOf('h') < 0) {
return getTableTitle(title);
} else {
return cleanText(title.querySelector('.mw-headline').innerText.trim());
}
}
function retrieveData (table, headers) {
var rows = table.querySelectorAll('tr'),
tableData = {
rows: []
};
tableData.name = getTableTitle(table);
for (var r = 0; r < rows.length; r++) {
var columns = rows[r].querySelectorAll('td'),
row = {};
for (var c = 0; c < columns.length; c++) {
var img = columns[c].querySelector('img');
if (img) {
row[headers[c]] = img.src;
} else {
row[headers[c]] = cleanText(columns[c].innerText.trim());
}
}
if (Object.keys(row).length !== 0) {
tableData.rows.push(row);
}
}
return tableData;
}
function getTableHeaders (table) {
var headers = table.querySelectorAll('tr:nth-child(1) > th'),
tableHeaders = [];
if (!headers.length) {
headers = table.querySelectorAll('tr:nth-child(1) > td');
}
for (var i = 0; i < headers.length; i++) {
tableHeaders.push(cleanText(headers[i].innerText.toLowerCase()));
}
return tableHeaders;
}
function scrapeHtml () {
var tables = document.querySelectorAll('.mw-collapsible'),
data = [];
for (var i = 0; i < tables.length; i++) {
var headers = getTableHeaders(tables[i]);
data.push(retrieveData(tables[i], headers));
}
return data;
}
function cleanText(e){var t=e;return e.toLowerCase().indexOf("[collapse]")>-1&&(t=t.toLowerCase().split("[collapse]")[1].trim()),t}function getTableTitle(e){var t=e.previousElementSibling;return t.tagName.toLowerCase().indexOf("h")<0?getTableTitle(t):cleanText(t.querySelector(".mw-headline").innerText.trim())}function retrieveData(e,t){var r=e.querySelectorAll("tr"),l={rows:[]};l.name=getTableTitle(e);for(var n=0;n<r.length;n++){for(var a=r[n].querySelectorAll("td"),o={},c=0;c<a.length;c++){var i=a[c].querySelector("img");o[t[c]]=i?i.src:cleanText(a[c].innerText.trim())}0!==Object.keys(o).length&&l.rows.push(o)}return l}function getTableHeaders(e){var t=e.querySelectorAll("tr:nth-child(1) > th"),r=[];t.length||(t=e.querySelectorAll("tr:nth-child(1) > td"));for(var l=0;l<t.length;l++)r.push(cleanText(t[l].innerText.toLowerCase()));return r}function scrapeHtml(){for(var e=document.querySelectorAll(".mw-collapsible"),t=[],r=0;r<e.length;r++){var l=getTableHeaders(e[r]);t.push(retrieveData(e[r],l))}return t}console.log(scrapeHtml());
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment