Skip to content

Instantly share code, notes, and snippets.

@CooperLuan
Last active August 29, 2015 13:57
Show Gist options
  • Select an option

  • Save CooperLuan/9884170 to your computer and use it in GitHub Desktop.

Select an option

Save CooperLuan/9884170 to your computer and use it in GitHub Desktop.
flask example
# encoding: utf8
import logging
logging.basicConfig(level=logging.INFO)
from flask import make_response, jsonify
from flask import abort, redirect
from flask import Flask, request
app = Flask('info-center')
@app.route('/alg')
def alg():
x = request.args['x']
y = request.args['y']
return jsonify({'x': x, 'y': y})
if __name__ == "__main__":
port = 5701
logging.info('server http://127.0.0.1:%s/' % port)
app.run(host='0.0.0.0', port=port, debug=True)
# encoding: utf8
"""
extract by json format configuration
1. what if regx num 1 doesn't work
2. clean code
3. just for E of etl
"""
import requests
from lxml import etree
headers = {'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/34.0.1847.116 Safari/537.36'}
class xpath(object):
def __init__(self, exp):
self.exp = exp
class which(object):
def __init__(self, *args):
self.options = args
extracted = {
'title': xpath("//span[@id='btAsinTitle']/span/text()"),
'market_price': which(
xpath("//span[@id='listPriceValue']/text()"),
xpath("//span[@id='actualPriceValue']/b/text()")),
}
def parse_xpath(tree, match):
rs = tree.xpath(match.exp)
return rs and rs[0].strip() or None
def parse_which(tree, match):
for op in match.options:
rs = parse_xpath(tree, op)
if rs:
return rs
def parse_conf(tree, conf):
for k, v in conf.items():
print k,
cls = v.__class__.__name__
if cls == 'xpath':
print parse_xpath(tree, v).encode('utf-8')
elif cls == 'which':
print parse_which(tree, v).encode('utf-8')
amazon_iurl = 'http://www.amazon.cn/Rogue-American-Amber-Ale-%E7%BD%97%E6%A0%BC%E7%90%A5%E7%8F%80%E5%95%A4%E9%85%92-355ml/dp/B00E0HEADM/ref=sr_1_8?s=alcohol&ie=UTF8&qid=1397732156&sr=1-8'
resp = requests.get(amazon_iurl, headers=headers)
html = resp.content.decode(resp.encoding)
tree = etree.HTML(html)
parse_conf(tree, extracted)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment