Created
March 16, 2010 07:51
-
-
Save planbnet/333736 to your computer and use it in GitHub Desktop.
Revisions
-
planbnet revised this gist
Mar 16, 2010 . 3 changed files with 146 additions and 138 deletions.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -0,0 +1,145 @@ // Bookmarklet to calculate the most generic xpath for the current selection // (helper utility for scraping websites) function selectionxpath() { function calculateShortestXpathOfElement( sel ) { var node = sel; var nextId = null; var stop = null; var xpath = ""; //find next element with an id while (true) { if (node.id && node.id != "") { nextId = node.id; break; } node = node.parentNode; if (node == stop) break; } if (nextId != null) { xpath = "//" + node.tagName.toLowerCase() + "[@id='" + nextId + "']"; if ( node == sel ) { return xpath; } else { stop = node; } } //find next element with unique tag+class node = sel; var nextUniqueClass = null; while (true) { if (node.nodeType === 1) { var styleClass = node.getAttribute("class"); if (styleClass != null) { var tmpXpath = xpath+"//"+node.tagName+"[@class='"+styleClass+"']"; var tempResult = document.evaluate(tmpXpath, sel.ownerDocument, null, XPathResult.FIRST_ORDERED_NODE_TYPE, null); if (tempResult.singleNodeValue == node) { nextUniqueClass = styleClass; break; } } } node = node.parentNode; if (node == stop) break; } if (nextUniqueClass != null) { xpath += "//"+node.tagName.toLowerCase()+"[@class='"+nextUniqueClass+"']"; if ( node == sel ) { return xpath; } else { stop = node; } } //find next element with unique tag node = sel; var nextUniqueTag = null; while (true) { if (node.nodeType === 1) { var tmpXpath = xpath+"//"+node.tagName; var tempResult = document.evaluate(tmpXpath, sel.ownerDocument, null, XPathResult.FIRST_ORDERED_NODE_TYPE, null); if (tempResult.singleNodeValue == node) { nextUniqueTag = node.tagName; break; } } node = node.parentNode; if (node == stop) break; } if (nextUniqueTag != null) { xpath += "//"+node.tagName.toLowerCase(); if ( node == sel ) { return xpath; } else { stop = node; } } //get absolute path for the rest var restPath = ""; for (node = sel; node && node.nodeType == 1; node = node.parentNode) { if (node == stop) break; var idx = 1; for (var sib = node.previousSibling; sib ; sib = sib.previousSibling) { if(sib.nodeType == 1 && sib.tagName == node.tagName) idx++; } var xname = node.tagName.toLowerCase(); if (idx > 1) xname += "[" + idx + "]"; restPath = "/" + xname + restPath; } var result = xpath + restPath; return result; } function depthOf( el ) { i = 0; while (el) { el = el.parentNode; i++; } return i; } function calculateShortestXpathOfSelection() { var sel = window.getSelection().getRangeAt(0); if (!sel) return null; var start = sel.startContainer; var end = sel.endContainer; var i = depthOf( start ); var j = depthOf( end ); while (start != end && i != 0 && j != 0) { if (i > j) { start = start.parentNode; i--; } else { end = end.parentNode; j--; } } return calculateShortestXpathOfElement(start); } var xpath = calculateShortestXpathOfSelection(); var node = document.evaluate(xpath, document, null, XPathResult.FIRST_ORDERED_NODE_TYPE, null).singleNodeValue; var border = node.style.border; if (!border) border = ""; node.style.border = "2px dashed red"; if (xpath) { prompt("Most generic xpath for selection:", xpath); node.style.border = border; } else { alert("Could not determine generic xpath for selection"); } } selectionxpath(); This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -1,138 +0,0 @@ This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -0,0 +1 @@ <html><body><p><a href="javascript:function%20selectionxpath()%20{function%20calculateShortestXpathOfElement(%20sel%20)%20{var%20node%20=%20sel;var%20nextId%20=%20null;var%20stop%20=%20null;var%20xpath%20=%20%22%22;while%20(true)%20{if%20(node.id%20&&%20node.id%20!=%20%22%22)%20{nextId%20=%20node.id;break;}node%20=%20node.parentNode;if%20(node%20==%20stop)%20break;}if%20(nextId%20!=%20null)%20{xpath%20=%20%22//%22%20+%20node.tagName.toLowerCase()%20+%20%22[@id=%27%22%20+%20nextId%20+%20%22%27]%22;if%20(%20node%20==%20sel%20)%20{return%20xpath;}%20else%20{stop%20=%20node;}}node%20=%20sel;var%20nextUniqueClass%20=%20null;while%20(true)%20{if%20(node.nodeType%20===%201)%20{var%20styleClass%20=%20node.getAttribute(%22class%22);if%20(styleClass%20!=%20null)%20{var%20tmpXpath%20=%20xpath+%22//%22+node.tagName+%22[@class=%27%22+styleClass+%22%27]%22;var%20tempResult%20=%20document.evaluate(tmpXpath,%20sel.ownerDocument,%20null,%20XPathResult.FIRST_ORDERED_NODE_TYPE,%20null);if%20(tempResult.singleNodeValue%20==%20node)%20{nextUniqueClass%20=%20styleClass;break;}}}node%20=%20node.parentNode;if%20(node%20==%20stop)%20break;}if%20(nextUniqueClass%20!=%20null)%20{xpath%20+=%20%22//%22+node.tagName.toLowerCase()+%22[@class=%27%22+nextUniqueClass+%22%27]%22;if%20(%20node%20==%20sel%20)%20{return%20xpath;}%20else%20{stop%20=%20node;}}node%20=%20sel;var%20nextUniqueTag%20=%20null;while%20(true)%20{if%20(node.nodeType%20===%201)%20{var%20tmpXpath%20=%20xpath+%22//%22+node.tagName;var%20tempResult%20=%20document.evaluate(tmpXpath,%20sel.ownerDocument,%20null,%20XPathResult.FIRST_ORDERED_NODE_TYPE,%20null);if%20(tempResult.singleNodeValue%20==%20node)%20{nextUniqueTag%20=%20node.tagName;break;}}node%20=%20node.parentNode;if%20(node%20==%20stop)%20break;}if%20(nextUniqueTag%20!=%20null)%20{xpath%20+=%20%22//%22+node.tagName.toLowerCase();if%20(%20node%20==%20sel%20)%20{return%20xpath;}%20else%20{stop%20=%20node;}}var%20restPath%20=%20%22%22;for%20(node%20=%20sel;%20node%20&&%20node.nodeType%20==%201;%20node%20=%20node.parentNode)%20{if%20(node%20==%20stop)%20break;var%20idx%20=%201;for%20(var%20sib%20=%20node.previousSibling;%20sib%20;%20sib%20=%20sib.previousSibling)%20{if(sib.nodeType%20==%201%20&&%20sib.tagName%20==%20node.tagName)%20idx++;}var%20xname%20=%20node.tagName.toLowerCase();if%20(idx%20>%201)%20xname%20+=%20%22[%22%20+%20idx%20+%20%22]%22;restPath%20=%20%22/%22%20+%20xname%20+%20restPath;}var%20result%20=%20xpath%20+%20restPath;return%20result;}function%20depthOf(%20el%20)%20{i%20=%200;while%20(el)%20{el%20=%20el.parentNode;i++;}return%20i;}function%20calculateShortestXpathOfSelection()%20{var%20sel%20=%20window.getSelection().getRangeAt(0);if%20(!sel)%20return%20null;var%20start%20=%20sel.startContainer;var%20end%20=%20sel.endContainer;var%20i%20=%20depthOf(%20start%20);var%20j%20=%20depthOf(%20end%20);while%20(start%20!=%20end%20&&%20i%20!=%200%20&&%20j%20!=%200)%20{if%20(i%20>%20j)%20{start%20=%20start.parentNode;i--;}%20else%20{end%20=%20end.parentNode;j--;}}return%20calculateShortestXpathOfElement(start);}var%20xpath%20=%20calculateShortestXpathOfSelection();var%20node%20=%20document.evaluate(xpath,%20document,%20null,%20XPathResult.FIRST_ORDERED_NODE_TYPE,%20null).singleNodeValue;var%20border%20=%20node.style.border;if%20(!border)%20border%20=%20%22%22;node.style.border%20=%20%222px%20dashed%20red%22;if%20(xpath)%20{prompt(%22Most%20generic%20xpath%20for%20selection:%22,%20xpath);node.style.border%20=%20border;}%20else%20{alert(%22Could%20not%20determine%20generic%20xpath%20for%20selection%22);}}selectionxpath();">Selection XPath</a></p><p/><p>Drag the link to you bookmark bar</p></body></html> -
planbnet created this gist
Mar 16, 2010 .There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -0,0 +1,138 @@ function selectionxpath() { function calculateShortestXpathOfElement( sel ) { var node = sel; var nextId = null; var stop = null; var xpath = ""; //find next element with an id while (true) { if (node.id && node.id != "") { nextId = node.id; break; } node = node.parentNode; if (node == stop) break; } if (nextId != null) { xpath = "//" + node.tagName.toLowerCase() + "[@id='" + nextId + "']"; if ( node == sel ) { return xpath; } else { stop = node; } } //find next element with unique tag+class node = sel; var nextUniqueClass = null; while (true) { if (node.nodeType === 1) { var styleClass = node.getAttribute("class"); if (styleClass != null) { var tmpXpath = xpath+"//"+node.tagName+"[@class='"+styleClass+"']"; var tempResult = document.evaluate(tmpXpath, sel.ownerDocument, null, XPathResult.FIRST_ORDERED_NODE_TYPE, null); if (tempResult.singleNodeValue == node) { nextUniqueClass = styleClass; break; } } } node = node.parentNode; if (node == stop) break; } if (nextUniqueClass != null) { xpath += "//"+node.tagName.toLowerCase()+"[@class='"+nextUniqueClass+"']"; if ( node == sel ) { return xpath; } else { stop = node; } } //find next element with unique tag node = sel; var nextUniqueTag = null; while (true) { if (node.nodeType === 1) { var tmpXpath = xpath+"//"+node.tagName; var tempResult = document.evaluate(tmpXpath, sel.ownerDocument, null, XPathResult.FIRST_ORDERED_NODE_TYPE, null); if (tempResult.singleNodeValue == node) { nextUniqueTag = node.tagName; break; } } node = node.parentNode; if (node == stop) break; } if (nextUniqueTag != null) { xpath += "//"+node.tagName.toLowerCase(); if ( node == sel ) { return xpath; } else { stop = node; } } //get absolute path for the rest var restPath = ""; for (node = sel; node && node.nodeType == 1; node = node.parentNode) { if (node == stop) break; var idx = 1; for (var sib = node.previousSibling; sib ; sib = sib.previousSibling) { if(sib.nodeType == 1 && sib.tagName == node.tagName) idx++; } var xname = node.tagName.toLowerCase(); if (idx > 1) xname += "[" + idx + "]"; restPath = "/" + xname + restPath; } var result = xpath + restPath; return result; } function depthOf( el ) { i = 0; while (el) { el = el.parentNode; i++; } return i; } function calculateShortestXpathOfSelection() { var sel = window.getSelection().getRangeAt(0); if (!sel) return null; var start = sel.startContainer; var end = sel.endContainer; var i = depthOf( start ); var j = depthOf( end ); while (start != end && i != 0 && j != 0) { if (i > j) { start = start.parentNode; i--; } else { end = end.parentNode; j--; } } return calculateShortestXpathOfElement(start); } var xpath = calculateShortestXpathOfSelection(); if (xpath) { prompt("Most generic xpath for selection:", xpath); } else { alert("Could not determine generic xpath for selection"); } } selectionxpath();