Skip to content

Instantly share code, notes, and snippets.

@epicserve
Last active December 22, 2015 10:09
Show Gist options
  • Select an option

  • Save epicserve/6456150 to your computer and use it in GitHub Desktop.

Select an option

Save epicserve/6456150 to your computer and use it in GitHub Desktop.
List the top pages for a website using Google Analytics
"""
List the top pages for a website using Google Analytics.
Installation
------------
1. Install the required python library::
pip install google-api-python-client==1.2
2. Go to https://code.google.com/apis/console/ to get your API credentials.
- Click create project
- Click the on/off button next to "Analytics API" to turn on
"Analytics API" service.
- Click on the "API Access" tab
- Click the "Create an OAuth 2.0 client ID" button
- Add a "Product name" and "Home Page URL" and then click next
- Select "Installed application"
- Click "Create client ID"
3. Download client_secrets.json and put it in the same directroy as this script
Usage
-----
Before you run the script you'll need to get the profile id for the website
you want to get top pages for. Login to Google Analytics and then view the
analytics for the website you want the top pages for. The property ID will be
the number after the "p" in the URL.
If the URL was
https://www.google.com/analytics/web/?hl=en&pli=1#report/visitors-overview/a1212121w23232323p34343434/,
then your profile ID would be 34343434.
Run the script::
python list_ga_top_pages.py --profile_id 34343434 --start_date '2013-09-01' \
--end_date '2013-09-05' --filter '^/news/201*' --max_results 20
"""
from apiclient import discovery
from oauth2client import client
from oauth2client import file
from oauth2client import tools
import sys
import argparse
import httplib2
import os
__author__ = "epicserve@gmail.com (Brent O'Connor)"
def get_service(argv, name='analytics', version='v3', doc=None, client_secrets=None, scope='https://www.googleapis.com/auth/analytics.readonly', parents=[]):
if scope is None:
scope = 'https://www.googleapis.com/auth/' + name
if doc is None:
doc = __doc__
# Parser command-line arguments.
parent_parsers = [tools.argparser]
parent_parsers.extend(parents)
parser = argparse.ArgumentParser(
description=doc,
formatter_class=argparse.RawDescriptionHelpFormatter,
parents=parent_parsers)
flags = parser.parse_args(argv[1:])
# Name of a file containing the OAuth 2.0 information for this
# application, including client_id and client_secret, which are found
# on the API Access tab on the Google APIs
# Console <http://code.google.com/apis/console>.
if client_secrets is None:
client_secrets = os.path.join(os.path.dirname(__file__), 'client_secrets.json')
# Set up a Flow object to be used if we need to authenticate.
flow = client.flow_from_clientsecrets(
client_secrets,
scope=scope,
message=tools.message_if_missing(client_secrets))
# Prepare credentials, and authorize HTTP object with them.
# If the credentials don't exist or are invalid run through the native client
# flow. The Storage object will ensure that if successful the good
# credentials will get written back to a file.
storage = file.Storage(name + '.dat')
credentials = storage.get()
if credentials is None or credentials.invalid:
credentials = tools.run_flow(flow, storage, flags)
http = credentials.authorize(http=httplib2.Http())
# Construct a service object via the discovery service.
service = discovery.build(name, version, http=http)
return (service, flags)
def get_top_pages(service, profile_id, start_date, end_data, filter='^/*', max_results=50):
return service.data().ga().get(
ids='ga:' + str(profile_id),
start_date=start_date,
end_date=end_data,
metrics='ga:pageviews',
dimensions='ga:pagePath',
sort='-ga:pageviews',
filters='ga:pagePath=~' + filter,
start_index='1',
max_results=max_results).execute()
if __name__ == '__main__':
parser = argparse.ArgumentParser(description='List the top pages for a website.', add_help=False)
parser.add_argument('-pid', '--profile_id', type=int, required=True)
parser.add_argument('--start_date', required=True, help='2013-09-01')
parser.add_argument('--end_date', required=True, help='2013-09-05')
parser.add_argument('--filter', default='^/*', help='^/news/201*')
parser.add_argument('--max_results', default=50, type=int)
argv = list(sys.argv)
if '--noauth_local_webserver' in argv:
argv.pop(argv.index('--noauth_local_webserver'))
args = parser.parse_args(argv[1:])
service, flags = get_service(sys.argv, parents=[parser])
results = get_top_pages(service, args.profile_id, start_date=args.start_date, end_data=args.end_date, filter=args.filter, max_results=args.max_results)
if results.get('rows', []):
for row in results.get('rows'):
print('{0:<152}{1:>5}'.format(*row))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment