The following gist is an extract of the article Building a simple crawler. It allows crawling from a URL and for a given number of bounce.
from crawler import Crawler
crawler = Crawler()
crawler.crawl('http://techcrunch.com/')
| import math | |
| import Image | |
| import Levenshtein | |
| class BWImageCompare(object): | |
| """Compares two images (b/w).""" | |
| _pixel = 255 |
| #! /usr/bin/python3 | |
| import argparse | |
| from operator import attrgetter | |
| import os | |
| import sys | |
| from github import Github | |
| def main(): |
The following gist is an extract of the article Building a simple crawler. It allows crawling from a URL and for a given number of bounce.
from crawler import Crawler
crawler = Crawler()
crawler.crawl('http://techcrunch.com/')
| #!/usr/bin/env python | |
| import numpy | |
| import sys | |
| import timeit | |
| try: | |
| import numpy.core._dotblas | |
| print 'FAST BLAS' | |
| except ImportError: | |
| print 'slow blas' |
| from slacker import Slacker | |
| import json | |
| import argparse | |
| import os | |
| # This script finds all channels, private channels and direct messages | |
| # that your user participates in, downloads the complete history for | |
| # those converations and writes each conversation out to seperate json files. | |
| # | |
| # This user centric history gathering is nice because the official slack data exporter | |
| # only exports public channels. |
I hereby claim:
To claim this, I am signing this object:
| import javax.crypto.Cipher; | |
| class Test { | |
| public static void main(String[] args) { | |
| try { | |
| System.out.println("Hello World!"); | |
| int maxKeyLen = Cipher.getMaxAllowedKeyLength("AES"); | |
| System.out.println(maxKeyLen); | |
| } catch (Exception e){ | |
| System.out.println("Sad world :("); |
| -- This is a Hive program. Hive is an SQL-like language that compiles | |
| -- into Hadoop Map/Reduce jobs. It's very popular among analysts at | |
| -- Facebook, because it allows them to query enormous Hadoop data | |
| -- stores using a language much like SQL. | |
| -- Our logs are stored on the Hadoop Distributed File System, in the | |
| -- directory /logs/randomhacks.net/access. They're ordinary Apache | |
| -- logs in *.gz format. | |
| -- | |
| -- We want to pretend that these gzipped log files are a database table, |
| #!/usr/bin/env bash | |
| # script: watch | |
| # author: Mike Smullin <mike@smullindesign.com> | |
| # license: GPLv3 | |
| # description: | |
| # watches the given path for changes | |
| # and executes a given command when changes occur | |
| # usage: | |
| # watch <path> <cmd...> | |
| # |