require_relative 'application_nginx_log_parser' start_time = Time.now RESULT_FILE = 'nginx.search.results.json' SEARCH_REGEXP = /search\/questions\?(.*?)((q\=(.*?)\&))/i def parse_file match_results = [] # Withou second parameter the default log format of nginx will be assumed parser = Application::NginxLogParser.new('nginx.access.log') # It's possible to use a custom regexp to read line-by-line # parser = Application::NginxLogParser.new('nginx.access.log', /(.*)/) # parse and find for lines matching an regexp parser.parse_matching(SEARCH_REGEXP) do |parsed_line, matches| print "\r#{parser.current_line}/#{parser.total_lines} = #{parser.percent_read}%" match_results << { search: matches[-1] }.merge(parsed_line) end # Generate well formated results file for each line of JSON File.open(RESULT_FILE , 'wb') do |f| f.write(JSON.generate(match_results)) end end def skip_log_file_parse? ARGV.include?('-s') || ARGV.include?('--skip-log-parse') end parse_file unless skip_log_file_parse? # group data by term data = JSON.parse(File.read(RESULT_FILE)) grouped_data = data.group_by {|d| URI.decode(d["search"]) } total_by_term = grouped_data.map {|data| { search: data[0].gsub(/\+/, ' '), total: data[1].size } } sorted = total_by_term.sort_by {|data| data[:total] } # well formated results of search by term File.open('total_searches.json', 'wb') do |f| total_searches = sorted.inject(0) {|total, data| total += data[:total] } json = JSON.pretty_generate({ total_searches: total_searches, report: sorted.reverse }) f.write(json) end end_time = Time.now runtime = (end_time - start_time) puts "\nExecuted in %s seconds" % runtime