import datetime import os from scrapy import signals from scrapy.exceptions import NotConfigured from scrapy.utils.project import data_path def json_serial(obj): """JSON serializer for objects not serializable by default json code""" if isinstance(obj, (datetime.datetime, datetime.date)): return obj.isoformat() raise TypeError ("Type %s not serializable" % type(obj)) class StatsStore(object): def __init__(self, stats): self.stats = stats self.statsdir = data_path('stats', createdir=True) @classmethod def from_crawler(cls, crawler): o = cls(crawler.stats) crawler.signals.connect(o.spider_closed, signal=signals.spider_closed) return o def spider_closed(self, spider): spider_stats = self.stats.get_stats(spider) key = int(datetime.datetime.now().timestamp()) stats_f = os.path.join(self.statsdir, str(key)) import json with open(stats_f, "w") as s_f: s_f.write(json.dumps(spider_stats, default=json_serial))