diff --git a/.github/tools/fetch_athena_stats.sh b/.github/tools/fetch_athena_stats.sh new file mode 100755 index 00000000..5459863a --- /dev/null +++ b/.github/tools/fetch_athena_stats.sh @@ -0,0 +1,118 @@ +#!/usr/bin/env bash + +# This script performs the following: +# 1. Run the query, use jq to capture the QueryExecutionId, and then capture that into bash variable +# 2. Wait for the query to finish running (240 seconds). +# 3. Get the results. +# 4. Json data points struct build + +# Expected env variables are: +# AWS_ACCESS_KEY_ID and AWS_SECRET_ACCESS_KEY for accessing AWS resources +# AWS_ATHENA_SOURCE_TABLE +# AWS_ATHENA_OUTPUT_LOCATION +# GITHUB_REPOSITORY + +set -euo pipefail + +loadExecutionId=$( + aws athena start-query-execution \ + --query-string "MSCK REPAIR TABLE ${AWS_ATHENA_SOURCE_TABLE};" \ + --result-configuration "OutputLocation=${AWS_ATHENA_OUTPUT_LOCATION}" \ + --region us-east-1 | jq -r ".QueryExecutionId" +) + +echo "QueryExecutionId is ${loadExecutionId}" +for i in $(seq 1 120); do + loadState=$( + aws athena get-query-execution \ + --query-execution-id "${loadExecutionId}" \ + --region us-east-1 | jq -r ".QueryExecution.Status.State" + ) + + if [[ "${loadState}" == "SUCCEEDED" ]]; then + break + fi + + echo "QueryExecutionId ${loadExecutionId} - state is ${loadState}" + + if [[ "${loadState}" == "FAILED" ]]; then + exit 1 + fi + + sleep 2 +done + +! read -r -d '' query <