Skip to content

Instantly share code, notes, and snippets.

@cooldaemon
Created September 11, 2009 02:17
Show Gist options
  • Select an option

  • Save cooldaemon/185012 to your computer and use it in GitHub Desktop.

Select an option

Save cooldaemon/185012 to your computer and use it in GitHub Desktop.

Revisions

  1. cooldaemon revised this gist Sep 11, 2009. 5 changed files with 162 additions and 0 deletions.
    19 changes: 19 additions & 0 deletions DDXMLDocument+HTML.h
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,19 @@
    #import <Foundation/Foundation.h>
    #import <libxml/HTMLparser.h>
    #import "DDXMLDocument.h"

    @interface DDXMLDocument (HTML)

    - (id)initWithHTMLString:(NSString *)string
    options:(NSUInteger)options
    error:(NSError **)error;

    - (id)initWithHTMLData:(NSData *)data
    options:(NSUInteger)options
    error:(NSError **)error;

    - (id)initWithData:(NSData *)data
    options:(NSUInteger)options
    error:(NSError **)error;

    @end
    95 changes: 95 additions & 0 deletions DDXMLDocument+HTML.m
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,95 @@
    #import "DDXMLDocument+HTML.h"
    #import "DDXMLPrivate.h"

    enum {
    XMLDocument,
    HTMLDocument
    };
    typedef NSUInteger DocumentContent;

    @implementation DDXMLDocument (HTML)

    - (void)setError:(NSError **)error code:(NSInteger)code
    {
    if (!error) {
    return;
    }

    *error = [NSError
    errorWithDomain:@"DDXMLErrorDomain"
    code:code
    userInfo:nil
    ];
    }

    - (id)initWithData:(NSData *)data
    content:(DocumentContent)content
    options:(NSUInteger)options
    error:(NSError **)error
    {
    if (data == nil || [data length] == 0) {
    [self setError:error code:0];
    [self release];
    return nil;
    }

    xmlKeepBlanksDefault(0);

    xmlDocPtr doc;
    if (HTMLDocument == content) {
    doc = htmlReadMemory(
    [data bytes], [data length],
    "", NULL, options
    );
    } else {
    doc = xmlReadMemory(
    [data bytes], [data length],
    "", NULL, options
    );
    }

    if (doc == NULL) {
    [self setError:error code:1];
    [self release];
    return nil;
    }

    return [self initWithCheckedPrimitive:(xmlKindPtr)doc];
    }

    - (id)initWithHTMLString:(NSString *)string
    options:(NSUInteger)options
    error:(NSError **)error
    {
    return [self
    initWithHTMLData:[string dataUsingEncoding:NSUTF8StringEncoding]
    options:options
    error:error
    ];
    }

    - (id)initWithHTMLData:(NSData *)data
    options:(NSUInteger)options
    error:(NSError **)error
    {
    return [self
    initWithData:data
    content:HTMLDocument
    options:options
    error:error
    ];
    }

    - (id)initWithData:(NSData *)data
    options:(NSUInteger)options
    error:(NSError **)error
    {
    return [self
    initWithData:data
    content:XMLDocument
    options:options
    error:error
    ];
    }

    @end
    8 changes: 8 additions & 0 deletions DDXMLNode+HTML.h
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,8 @@
    #import <Foundation/Foundation.h>
    #import "DDXMLNode.h"

    @interface DDXMLNode (HTML)

    + (BOOL)isXmlDocPtr:(xmlKindPtr)kindPtr;

    @end
    11 changes: 11 additions & 0 deletions DDXMLNode+HTML.m
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,11 @@
    #import "DDXMLNode+HTML.h"

    @implementation DDXMLNode (HTML)

    + (BOOL)isXmlDocPtr:(xmlKindPtr)kindPtr
    {
    return kindPtr->type == XML_DOCUMENT_NODE
    || kindPtr->type == XML_HTML_DOCUMENT_NODE;
    }

    @end
    29 changes: 29 additions & 0 deletions README.markdown
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,29 @@
    ## KissXML+HTML
    Added methods for parsing HTML to [KissXML](http://code.google.com/p/kissxml/ "KissXML").

    ### How to Use
    #import <Foundation/Foundation.h>
    #import "DDXML+HTML.h"

    NSError *error = nil;

    // html
    NSXMLDocument *htmlDocument = [[DDXMLDocument alloc]
    initWithHTMLData:htmlData
    options:HTML_PARSE_NOWARNING | HTML_PARSE_NOERROR
    error:&error
    ];

    // xml
    NSXMLDocument *xmlDocument = [[DDXMLDocument alloc]
    initWithData:htmlData
    options:XML_PARSE_RECOVER
    error:&error
    ];

    // xpath
    NSArray *array = [htmlDocument
    nodesForXPath:@"id(\"maincol\")/div[@class=\"content\"]/h2/following-sibling::node()[not(./preceding-sibling::node()/descendant-or-self::div[@class=\"posted\"])]"
    error:&error
    ];

  2. cooldaemon created this gist Sep 11, 2009.
    3 changes: 3 additions & 0 deletions DDXML+HTML.h
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,3 @@
    #import "DDXML.h";
    #import "DDXMLNode+HTML.h"
    #import "DDXMLDocument+HTML.h"