// Bookmarklet to calculate the most generic xpath for the current selection // (helper utility for scraping websites) function selectionxpath() { function calculateShortestXpathOfElement( sel ) { var node = sel; var nextId = null; var stop = null; var xpath = ""; //find next element with an id while (true) { if (node.id && node.id != "") { nextId = node.id; break; } node = node.parentNode; if (node == stop) break; } if (nextId != null) { xpath = "//" + node.tagName.toLowerCase() + "[@id='" + nextId + "']"; if ( node == sel ) { return xpath; } else { stop = node; } } //find next element with unique tag+class node = sel; var nextUniqueClass = null; while (true) { if (node.nodeType === 1) { var styleClass = node.getAttribute("class"); if (styleClass != null) { var tmpXpath = xpath+"//"+node.tagName+"[@class='"+styleClass+"']"; var tempResult = document.evaluate(tmpXpath, sel.ownerDocument, null, XPathResult.FIRST_ORDERED_NODE_TYPE, null); if (tempResult.singleNodeValue == node) { nextUniqueClass = styleClass; break; } } } node = node.parentNode; if (node == stop) break; } if (nextUniqueClass != null) { xpath += "//"+node.tagName.toLowerCase()+"[@class='"+nextUniqueClass+"']"; if ( node == sel ) { return xpath; } else { stop = node; } } //find next element with unique tag node = sel; var nextUniqueTag = null; while (true) { if (node.nodeType === 1) { var tmpXpath = xpath+"//"+node.tagName; var tempResult = document.evaluate(tmpXpath, sel.ownerDocument, null, XPathResult.FIRST_ORDERED_NODE_TYPE, null); if (tempResult.singleNodeValue == node) { nextUniqueTag = node.tagName; break; } } node = node.parentNode; if (node == stop) break; } if (nextUniqueTag != null) { xpath += "//"+node.tagName.toLowerCase(); if ( node == sel ) { return xpath; } else { stop = node; } } //get absolute path for the rest var restPath = ""; for (node = sel; node && node.nodeType == 1; node = node.parentNode) { if (node == stop) break; var idx = 1; for (var sib = node.previousSibling; sib ; sib = sib.previousSibling) { if(sib.nodeType == 1 && sib.tagName == node.tagName) idx++; } var xname = node.tagName.toLowerCase(); if (idx > 1) xname += "[" + idx + "]"; restPath = "/" + xname + restPath; } var result = xpath + restPath; return result; } function depthOf( el ) { i = 0; while (el) { el = el.parentNode; i++; } return i; } function calculateShortestXpathOfSelection() { var sel = window.getSelection().getRangeAt(0); if (!sel) return null; var start = sel.startContainer; var end = sel.endContainer; var i = depthOf( start ); var j = depthOf( end ); while (start != end && i != 0 && j != 0) { if (i > j) { start = start.parentNode; i--; } else { end = end.parentNode; j--; } } return calculateShortestXpathOfElement(start); } var xpath = calculateShortestXpathOfSelection(); var node = document.evaluate(xpath, document, null, XPathResult.FIRST_ORDERED_NODE_TYPE, null).singleNodeValue; var border = node.style.border; if (!border) border = ""; node.style.border = "2px dashed red"; if (xpath) { prompt("Most generic xpath for selection:", xpath); node.style.border = border; } else { alert("Could not determine generic xpath for selection"); } } selectionxpath();