Last active
December 22, 2015 10:09
-
-
Save epicserve/6456150 to your computer and use it in GitHub Desktop.
List the top pages for a website using Google Analytics
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| """ | |
| List the top pages for a website using Google Analytics. | |
| Installation | |
| ------------ | |
| 1. Install the required python library:: | |
| pip install google-api-python-client==1.2 | |
| 2. Go to https://code.google.com/apis/console/ to get your API credentials. | |
| - Click create project | |
| - Click the on/off button next to "Analytics API" to turn on | |
| "Analytics API" service. | |
| - Click on the "API Access" tab | |
| - Click the "Create an OAuth 2.0 client ID" button | |
| - Add a "Product name" and "Home Page URL" and then click next | |
| - Select "Installed application" | |
| - Click "Create client ID" | |
| 3. Download client_secrets.json and put it in the same directroy as this script | |
| Usage | |
| ----- | |
| Before you run the script you'll need to get the profile id for the website | |
| you want to get top pages for. Login to Google Analytics and then view the | |
| analytics for the website you want the top pages for. The property ID will be | |
| the number after the "p" in the URL. | |
| If the URL was | |
| https://www.google.com/analytics/web/?hl=en&pli=1#report/visitors-overview/a1212121w23232323p34343434/, | |
| then your profile ID would be 34343434. | |
| Run the script:: | |
| python list_ga_top_pages.py --profile_id 34343434 --start_date '2013-09-01' \ | |
| --end_date '2013-09-05' --filter '^/news/201*' --max_results 20 | |
| """ | |
| from apiclient import discovery | |
| from oauth2client import client | |
| from oauth2client import file | |
| from oauth2client import tools | |
| import sys | |
| import argparse | |
| import httplib2 | |
| import os | |
| __author__ = "epicserve@gmail.com (Brent O'Connor)" | |
| def get_service(argv, name='analytics', version='v3', doc=None, client_secrets=None, scope='https://www.googleapis.com/auth/analytics.readonly', parents=[]): | |
| if scope is None: | |
| scope = 'https://www.googleapis.com/auth/' + name | |
| if doc is None: | |
| doc = __doc__ | |
| # Parser command-line arguments. | |
| parent_parsers = [tools.argparser] | |
| parent_parsers.extend(parents) | |
| parser = argparse.ArgumentParser( | |
| description=doc, | |
| formatter_class=argparse.RawDescriptionHelpFormatter, | |
| parents=parent_parsers) | |
| flags = parser.parse_args(argv[1:]) | |
| # Name of a file containing the OAuth 2.0 information for this | |
| # application, including client_id and client_secret, which are found | |
| # on the API Access tab on the Google APIs | |
| # Console <http://code.google.com/apis/console>. | |
| if client_secrets is None: | |
| client_secrets = os.path.join(os.path.dirname(__file__), 'client_secrets.json') | |
| # Set up a Flow object to be used if we need to authenticate. | |
| flow = client.flow_from_clientsecrets( | |
| client_secrets, | |
| scope=scope, | |
| message=tools.message_if_missing(client_secrets)) | |
| # Prepare credentials, and authorize HTTP object with them. | |
| # If the credentials don't exist or are invalid run through the native client | |
| # flow. The Storage object will ensure that if successful the good | |
| # credentials will get written back to a file. | |
| storage = file.Storage(name + '.dat') | |
| credentials = storage.get() | |
| if credentials is None or credentials.invalid: | |
| credentials = tools.run_flow(flow, storage, flags) | |
| http = credentials.authorize(http=httplib2.Http()) | |
| # Construct a service object via the discovery service. | |
| service = discovery.build(name, version, http=http) | |
| return (service, flags) | |
| def get_top_pages(service, profile_id, start_date, end_data, filter='^/*', max_results=50): | |
| return service.data().ga().get( | |
| ids='ga:' + str(profile_id), | |
| start_date=start_date, | |
| end_date=end_data, | |
| metrics='ga:pageviews', | |
| dimensions='ga:pagePath', | |
| sort='-ga:pageviews', | |
| filters='ga:pagePath=~' + filter, | |
| start_index='1', | |
| max_results=max_results).execute() | |
| if __name__ == '__main__': | |
| parser = argparse.ArgumentParser(description='List the top pages for a website.', add_help=False) | |
| parser.add_argument('-pid', '--profile_id', type=int, required=True) | |
| parser.add_argument('--start_date', required=True, help='2013-09-01') | |
| parser.add_argument('--end_date', required=True, help='2013-09-05') | |
| parser.add_argument('--filter', default='^/*', help='^/news/201*') | |
| parser.add_argument('--max_results', default=50, type=int) | |
| argv = list(sys.argv) | |
| if '--noauth_local_webserver' in argv: | |
| argv.pop(argv.index('--noauth_local_webserver')) | |
| args = parser.parse_args(argv[1:]) | |
| service, flags = get_service(sys.argv, parents=[parser]) | |
| results = get_top_pages(service, args.profile_id, start_date=args.start_date, end_data=args.end_date, filter=args.filter, max_results=args.max_results) | |
| if results.get('rows', []): | |
| for row in results.get('rows'): | |
| print('{0:<152}{1:>5}'.format(*row)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment