Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Analytics api #73

Merged
merged 17 commits into from
Oct 31, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
17 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 20 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -101,6 +101,26 @@ Request body:
}
```

#### Product Analytics Bulk Upload
Exports Google Analytics data gathered from the IDVA flow to Google Drive, as a google sheets object. Routine then builds pivot tables to enable user to read data easily. Default behaviour for the API `/analytics` writes data for the previous day.

The ID of the Google Drive folder is configurable in `ANALYTICS_ROOT`. (`settings`)

Optionally, the user can pass in a date range to be uploaded. The data is collated into a single document, and the same pivot tables are written on the collated data.

`POST /analytics`
```
Query parameters: None
```
`POST /analytics/daterange`
```JSON
// Request body
{
"startDate": "YYYY-MM-DD",
"endDate": "YYYY-MM-DD"
}
```


### Deploying to Cloud.gov during development
All deployments require having the correct Cloud.gov credentials in place. If
Expand Down
83 changes: 83 additions & 0 deletions gdrive/analytics_api.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,83 @@
"""
Google Analytics Rest API
"""

from datetime import datetime, timedelta
import logging
from typing import Optional

import fastapi
from pydantic import BaseModel
from fastapi import responses
from gdrive import analytics_client, idva_flow_analytics

log = logging.getLogger(__name__)
router = fastapi.APIRouter()


class AnalyticsRequest(BaseModel):
startDate: str
endDate: str


@router.post("/analytics")
nathan-moore-97 marked this conversation as resolved.
Show resolved Hide resolved
async def run_analytics_default(req: Optional[AnalyticsRequest] = None):
start = None
end = None
message = None
if req is None:
start = datetime.today() - timedelta(days=1)
message = "Analytics report for %s complete." % (datetime.date(start))
else:
try:
start = datetime.strptime(req.startDate, analytics_client.API_DATE_FORMAT)
end = datetime.strptime(req.endDate, analytics_client.API_DATE_FORMAT)
message = "Analytics report for %s - %s complete." % (
datetime.date(start),
datetime.date(end),
)
except ValueError as err:
# @suppress("py/stack-trace-exposure")
return responses.JSONResponse(
status_code=422,
content="Failed (invalid date parameters): %s" % (err),
Dismissed Show dismissed Hide dismissed
)

run_analytics(start, end)
return responses.JSONResponse(
status_code=202,
content=message,
)


@router.post("/analytics/list")
async def list_accounts():
list_accounts()
return responses.JSONResponse(
status_code=202, content="List request is being processed."
)


def run_analytics(start_date: datetime, end_date: datetime):
try:
idva_flow_analytics.create_report(start_date, end_date)
except Exception as e:
log.error(e)


def list_accounts():
try:
list_response = analytics_client.list()
if list_response is not None:
log.info("-------------------------------")
for act in list_response.accounts:
log.info("Name:\t\t%s" % (act.name))
log.info("Display name:\t%s" % (act.display_name))
log.info("-------------------------------")
else:
log.warn(
"List response was none. Ensure credentials are set correctly"
+ " and you have access to the cloud property."
)
except Exception as e:
log.error(e.args)
109 changes: 109 additions & 0 deletions gdrive/analytics_client.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,109 @@
import datetime

from google.oauth2 import service_account
from google.analytics.admin import AnalyticsAdminServiceClient
from google.analytics.data_v1beta import BetaAnalyticsDataClient
from google.analytics.data_v1beta.types import (
DateRange,
Dimension,
Metric,
RunReportRequest,
RunReportResponse,
)

import logging
import pandas as pd

from gdrive import settings

log = logging.getLogger(__name__)

creds = service_account.Credentials.from_service_account_info(settings.CREDENTIALS)
API_DATE_FORMAT = "%Y-%m-%d"

"""
Client for the Google Analytics (GA4) API

This class contains functions relating to downloading analytics data
for the IDVA flow.
"""


def download(
property_id, target_date: datetime, end_date: datetime = None
) -> RunReportResponse:
"""
Access Google Analytics (GA4) api and download desired analytics report.
"""
if end_date is None:
end_date = target_date

request = RunReportRequest(
property=f"properties/{property_id}",
limit="250",
# https://developers.google.com/analytics/devguides/reporting/data/v1/api-schema
dimensions=[
Dimension(name="eventName"),
Dimension(name="firstUserCampaignName"),
Dimension(name="firstUserMedium"),
Dimension(name="firstUserSource"),
Dimension(name="isConversionEvent"),
Dimension(name="linkUrl"),
],
metrics=[
Metric(name="eventCount"),
Metric(name="sessions"),
Metric(name="totalUsers"),
Metric(name="eventCountPerUser"),
Metric(name="conversions"),
],
date_ranges=[
DateRange(
start_date=format_date_for_api(target_date),
end_date=format_date_for_api(end_date),
)
],
)

return BetaAnalyticsDataClient(credentials=creds).run_report(request)


def list():
"""
List the available properties the user has access to. Can be run to
verify setup of the enviornment is correct.
"""
client = AnalyticsAdminServiceClient(credentials=creds)
return client.list_accounts()


def format_date_for_api(date: datetime):
"""
Formats datetime object for Google Analytics Api (GA4) input
"""
return date.strftime(API_DATE_FORMAT)


def create_df_from_analytics_response(response: RunReportResponse):
"""
Extracts values from Google Analytics API response and transforms
them into pandas DataFrame for ease of use. This enables the analytics
client to do any processing of the data desired, if something comes up in
the future we want to do but isnt supported in GA4.
"""
all_headers = []
for _, header in enumerate(response.dimension_headers):
all_headers += [header.name]
for _, header in enumerate(response.metric_headers):
all_headers += [header.name]

arr = [all_headers]
for _, row in enumerate(response.rows):
row_li = []
for _, val in enumerate(row.dimension_values):
row_li += [val.value]
for _, val in enumerate(row.metric_values):
row_li += [val.value]
arr += [row_li]

return pd.DataFrame(arr)
16 changes: 9 additions & 7 deletions gdrive/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,13 +12,13 @@
from googleapiclient.http import HttpError
from starlette.requests import Request

from . import client, settings
from . import drive_client, settings

log = logging.getLogger(__name__)

router = fastapi.APIRouter()

client.init()
drive_client.init()


# Patch zip decodeExtra to ignore invalid extra data
Expand Down Expand Up @@ -50,16 +50,18 @@ async def upload_file(

stream = io.BytesIO(body)

parent = client.create_folder(id, settings.ROOT_DIRECTORY)
parent = drive_client.create_folder(id, settings.ROOT_DIRECTORY)

if zip:
with zipfile.ZipFile(stream) as archive:
files = archive.filelist
for file in files:
image = io.BytesIO(archive.read(file))
client.upload_basic(f"{filename}_{file.filename}", parent, image)
drive_client.upload_basic(
f"{filename}_{file.filename}", parent, image
)
else:
client.upload_basic(filename, parent, stream)
drive_client.upload_basic(filename, parent, stream)

except HttpError as error:
log.error(f"An error occurred: {error}")
Expand All @@ -73,10 +75,10 @@ async def delete_file(filename, response: Response):
"""

try:
files = client.get_files(filename)
files = drive_client.get_files(filename)
if files:
for file in files:
client.delete_file(file["id"])
drive_client.delete_file(file["id"])
else:
response.status_code = status.HTTP_404_NOT_FOUND

Expand Down
69 changes: 17 additions & 52 deletions gdrive/client.py → gdrive/drive_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,8 @@
creds = service_account.Credentials.from_service_account_info(
settings.CREDENTIALS, scopes=settings.SCOPES
)

service = build("drive", "v3", credentials=creds)
sheets_service = build("sheets", "v4", credentials=creds)


def init():
Expand Down Expand Up @@ -62,6 +62,22 @@ def list(count: int = 10, shared: bool = True) -> None:
log.info(f"No such key: {error} in {item}")


def create_empty_spreadsheet(filename: str, parent_id: str) -> str:
file_metadata = {
"name": filename,
"parents": [parent_id],
"mimeType": "application/vnd.google-apps.spreadsheet",
}

file = (
service.files()
.create(body=file_metadata, fields="id", supportsAllDrives=True)
.execute()
)

return file.get("id")


def drives_list():
"""
List available shared drives
Expand Down Expand Up @@ -164,54 +180,3 @@ def delete_file(id: str) -> None:
"""

service.files().delete(fileId=id, supportsAllDrives=True).execute()


def upload_participant(
first,
last,
email,
responseId,
time,
date,
ethnicity,
race,
gender,
age,
income,
skin_tone,
):
"""
Append participant data to spreadsheet
"""
values = [
[
first,
last,
first + " " + last,
email,
responseId,
time,
date,
ethnicity,
race,
gender,
income,
skin_tone,
]
]

body = {"values": values}
result = (
sheets_service.spreadsheets()
.values()
.append(
spreadsheetId=settings.SHEETS_ID,
range="Sheet1!A1",
valueInputOption="RAW",
body=body,
)
.execute()
)
if "error" in result:
raise error.ExportError(result["error"]["message"])
return result
8 changes: 4 additions & 4 deletions gdrive/export_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
from pydantic import BaseModel
from fastapi import BackgroundTasks, responses

from gdrive import export_client, client, settings, error
from gdrive import export_client, drive_client, settings, error

log = logging.getLogger(__name__)

Expand All @@ -23,8 +23,8 @@ async def upload_file(interactionId):
export_bytes = io.BytesIO(
export_client.codename(json.dumps(export_data, indent=2)).encode()
)
parent = client.create_folder(interactionId, settings.ROOT_DIRECTORY)
client.upload_basic("analytics.json", parent, export_bytes)
parent = drive_client.create_folder(interactionId, settings.ROOT_DIRECTORY)
drive_client.upload_basic("analytics.json", parent, export_bytes)


class ParticipantModel(BaseModel):
Expand Down Expand Up @@ -85,7 +85,7 @@ async def survey_upload_response_task(request):

if request.participant:
participant = request.participant
client.upload_participant(
drive_client.upload_participant(
participant.first,
participant.last,
participant.email,
Expand Down
Loading