-
-
Save cash2one/60d1d714646bdcdbf853040bec57adae to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/usr/bin/env python | |
| # -*- encoding: utf-8 -*- | |
| # Created on 2017-05-12 12:31:13 | |
| # Project: PVP | |
| from pyspider.libs.base_handler import * | |
| import json | |
| import re | |
| import pymongo | |
| class Handler(BaseHandler): | |
| crawl_config = { | |
| } | |
| hero_detail_url = 'http://pvp.qq.com/web201605/herodetail/{id}.shtml' | |
| type_map = { | |
| '1': '战士', | |
| '2': '法师', | |
| '3': '坦克', | |
| '4': '刺客', | |
| '5': '射手', | |
| '6': '辅助' | |
| } | |
| zhsjn_map = { | |
| '80115': '闪现', | |
| '80108': '终结', | |
| '80102': '治疗术', | |
| '80107': '净化', | |
| '80103': '晕眩', | |
| '80121': '弱化', | |
| '80109': '疾跑', | |
| '80110': '狂暴', | |
| '80104': '惩击', | |
| '80105': '干扰' | |
| } | |
| client = pymongo.MongoClient('localhost') | |
| db = client['pvp'] | |
| @every(minutes=24 * 60) | |
| def on_start(self): | |
| self.crawl('http://pvp.qq.com/web201605/js/herolist.json', callback=self.index_page) | |
| @config(age=10 * 24 * 60 * 60) | |
| def index_page(self, response): | |
| print(response.text) | |
| heroes = json.loads(response.text) | |
| for hero in heroes: | |
| url = self.hero_detail_url.format(id=hero.get('ename')) | |
| self.crawl(url, fetch_type='js', callback=self.detail_page) | |
| def find_number(self, text): | |
| result = re.search('(\d+)', text) | |
| if result: | |
| return result.group(1) | |
| @config(priority=2) | |
| def detail_page(self, response): | |
| id = re.search('detail\/(.*?)\.shtml', response.url).group(1) | |
| print(id) | |
| name = response.doc('h2.cover-name').text() | |
| print(name) | |
| title = response.doc('.cover-title').text() | |
| print(title) | |
| type = self.type_map.get(response.doc('.herodetail-sort i').attr('class')[-1]) | |
| print(type) | |
| background = response.doc('.pic-show-box').attr('style') | |
| background = re.search("url\(\'(.*?)\'", background).group(1) | |
| print(background) | |
| score_viability = self.find_number(response.doc('.cover-list-bar.bar1 .ibar').attr('style')) | |
| print(score_viability) | |
| score_attack = self.find_number(response.doc('.cover-list-bar.bar2 .ibar').attr('style')) | |
| print(score_attack) | |
| score_skill = self.find_number(response.doc('.cover-list-bar.bar3 .ibar').attr('style')) | |
| print(score_skill) | |
| score_difficulty = self.find_number(response.doc('.cover-list-bar.bar4 .ibar').attr('style')) | |
| print(score_difficulty) | |
| story = response.doc('.story .story-info .nr').text() | |
| print(story) | |
| history = response.doc('.history .story-info .nr').text() | |
| print(history) | |
| pf = [] | |
| pf_items = response.doc('.pic-pf li p').items() | |
| for item in pf_items: | |
| pf.append(item.text()) | |
| print(pf) | |
| skills_items = response.doc('.skill-show .show-list').items() | |
| skills = [] | |
| for item in skills_items: | |
| skill_name = item.find('.skill-btn').text() | |
| skill_cooling = item.find('.skill-p1').text() | |
| skill_consume = item.find('.skill-p2').text() | |
| skill_passivity = item.find('.skill-p3').text() | |
| skills.append({ | |
| '名称': skill_name, | |
| '冷却值': self.find_number(skill_cooling), | |
| '消耗': self.find_number(skill_consume), | |
| '介绍': skill_passivity | |
| }) | |
| print(skills) | |
| ming_items= response.doc('.sugg-u1 li').items() | |
| mings = [] | |
| for item in ming_items: | |
| print(item) | |
| print(item.find('p:not(:first-child)').text()) | |
| ming_name = item.find('em').text() | |
| item.find('em').remove() | |
| mings.append({ | |
| '名称': ming_name, | |
| '效果': item.find('p:not(:first-child)').text(), | |
| }) | |
| print(mings) | |
| add = {} | |
| add['主升'] = response.doc('.sugg-info2 span:nth-child(2) img').attr('alt') | |
| add['副升'] = response.doc('.sugg-info2 span:nth-child(4) img').attr('alt') | |
| print(add) | |
| zhs_skills = [] | |
| zhs_skill = response.doc('.sugg-info2 #skill3').attr('data-skill').split('|') | |
| print(zhs_skill) | |
| for skill in zhs_skill: | |
| zhs_skills.append({ | |
| 'ID': skill, | |
| '名称': self.zhsjn_map.get(skill) | |
| }) | |
| print(zhs_skills) | |
| cz_q = [] | |
| cz_z = [] | |
| cz_h = [] | |
| cz_q_items = response.doc('.equip .equip-info:nth-child(2) .equip-list li').items() | |
| print('cz', cz_q_items) | |
| print('i', response.doc('.equip .equip-info:nth-child(2) .equip-list li')) | |
| for item in cz_q_items: | |
| print(item) | |
| cz_q.append(item.find('p').text()) | |
| cz_z_items = response.doc('.equip .equip-info:nth-child(3) .equip-list li').items() | |
| for item in cz_z_items: | |
| cz_z.append(item.find('p').text()) | |
| cz_h_items = response.doc('.equip .equip-info:nth-child(4) .equip-list li').items() | |
| for item in cz_h_items: | |
| cz_h.append(item.find('p').text()) | |
| cz = {} | |
| cz['前期'] = cz_q | |
| cz['中期'] = cz_z | |
| cz['后期'] = cz_h | |
| gx = {} | |
| zjdd = response.doc('.hero.ls.fl .hero-info:nth-child(2) .hero-list').attr('data-relatename').split('|') | |
| yzyx = response.doc('.hero.ls.fl .hero-info:nth-child(3) .hero-list').attr('data-relatename').split('|') | |
| byzyx = response.doc('.hero.ls.fl .hero-info:nth-child(4) .hero-list').attr('data-relatename').split('|') | |
| gx['最佳搭档'] = zjdd | |
| gx['压制英雄'] = yzyx | |
| gx['被压制英雄'] = byzyx | |
| print(gx) | |
| print(cz) | |
| videos = [] | |
| video_items = response.doc('.video-info li').items() | |
| for item in video_items: | |
| videos.append({ | |
| '标题': item.find('.p1').text(), | |
| '链接': item.find('a').attr('href'), | |
| '发布时间': item.find('.play-time').text() | |
| }) | |
| print(videos) | |
| gl = [] | |
| gl_items = response.doc('.strategy-info li').items() | |
| for item in gl_items: | |
| gl.append({ | |
| '标题': item.find('.p-dec').text(), | |
| '链接': item.find('a').attr('href') | |
| }) | |
| print(gl) | |
| yield { | |
| 'ID': id, | |
| '名称': name, | |
| '头衔': title, | |
| '类型': type, | |
| '背景图': background, | |
| '生存能力': score_viability, | |
| '攻击伤害': score_attack, | |
| '技能效果': score_skill, | |
| '上手难度': score_difficulty, | |
| '铭文搭配建议': mings, | |
| '技能加点建议': add, | |
| '召唤师技能': zhs_skills, | |
| '出装': cz, | |
| '视频': videos, | |
| '攻略': gl, | |
| '英雄关系': gx, | |
| '皮肤': pf | |
| } | |
| def on_result(self, result): | |
| if result: | |
| print('result', result) | |
| if self.db['heroes'].update({'ID': result['ID']}, {'$set': dict(result)}, True): | |
| print('Saved to Mongo', result) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment