mirror of
https://github.com/arduino/arduino-ide.git
synced 2025-06-17 17:46:33 +00:00
[skip changelog] Add missing athena script
This commit is contained in:
parent
0eb2d25570
commit
ba177be41d
131
.github/tools/fetch_athena_stats.py
vendored
Normal file
131
.github/tools/fetch_athena_stats.py
vendored
Normal file
@ -0,0 +1,131 @@
|
|||||||
|
import boto3
|
||||||
|
import semver
|
||||||
|
import os
|
||||||
|
import logging
|
||||||
|
import uuid
|
||||||
|
import time
|
||||||
|
|
||||||
|
|
||||||
|
# logging.basicConfig(stream=sys.stdout, level=logging.DEBUG)
|
||||||
|
log = logging.getLogger()
|
||||||
|
logging.getLogger("boto3").setLevel(logging.CRITICAL)
|
||||||
|
logging.getLogger("botocore").setLevel(logging.CRITICAL)
|
||||||
|
logging.getLogger("urllib3").setLevel(logging.CRITICAL)
|
||||||
|
|
||||||
|
|
||||||
|
def execute(client, statement, dest_s3_output_location):
|
||||||
|
log.info("execute query: {} dumping in {}".format(statement, dest_s3_output_location))
|
||||||
|
result = client.start_query_execution(
|
||||||
|
QueryString=statement,
|
||||||
|
ClientRequestToken=str(uuid.uuid4()),
|
||||||
|
ResultConfiguration={
|
||||||
|
"OutputLocation": dest_s3_output_location,
|
||||||
|
},
|
||||||
|
)
|
||||||
|
execution_id = result["QueryExecutionId"]
|
||||||
|
log.info("wait for query {} completion".format(execution_id))
|
||||||
|
wait_for_query_execution_completion(client, execution_id)
|
||||||
|
log.info("operation successful")
|
||||||
|
return execution_id
|
||||||
|
|
||||||
|
|
||||||
|
def wait_for_query_execution_completion(client, query_execution_id):
|
||||||
|
query_ended = False
|
||||||
|
while not query_ended:
|
||||||
|
query_execution = client.get_query_execution(QueryExecutionId=query_execution_id)
|
||||||
|
state = query_execution["QueryExecution"]["Status"]["State"]
|
||||||
|
if state == "SUCCEEDED":
|
||||||
|
query_ended = True
|
||||||
|
elif state in ["FAILED", "CANCELLED"]:
|
||||||
|
raise BaseException(
|
||||||
|
"query failed or canceled: {}".format(query_execution["QueryExecution"]["Status"]["StateChangeReason"])
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
time.sleep(1)
|
||||||
|
|
||||||
|
|
||||||
|
def valid(key):
|
||||||
|
split = key.split("_")
|
||||||
|
if len(split) < 1:
|
||||||
|
return False
|
||||||
|
try:
|
||||||
|
semver.parse(split[0])
|
||||||
|
except ValueError:
|
||||||
|
return False
|
||||||
|
return True
|
||||||
|
|
||||||
|
|
||||||
|
def get_results(client, execution_id):
|
||||||
|
results_paginator = client.get_paginator("get_query_results")
|
||||||
|
results_iter = results_paginator.paginate(QueryExecutionId=execution_id, PaginationConfig={"PageSize": 1000})
|
||||||
|
res = {}
|
||||||
|
for results_page in results_iter:
|
||||||
|
for row in results_page["ResultSet"]["Rows"][1:]:
|
||||||
|
# Loop through the JSON objects
|
||||||
|
key = row["Data"][0]["VarCharValue"]
|
||||||
|
if valid(key):
|
||||||
|
res[key] = row["Data"][1]["VarCharValue"]
|
||||||
|
|
||||||
|
return res
|
||||||
|
|
||||||
|
|
||||||
|
def convert_data(data):
|
||||||
|
result = []
|
||||||
|
for key, value in data.items():
|
||||||
|
# 0.18.0_macOS_64bit.tar.gz
|
||||||
|
split_key = key.split("_")
|
||||||
|
if len(split_key) != 3:
|
||||||
|
continue
|
||||||
|
(version, os_version, arch) = split_key
|
||||||
|
arch_split = arch.split(".")
|
||||||
|
if len(arch_split) < 1:
|
||||||
|
continue
|
||||||
|
arch = arch_split[0]
|
||||||
|
if len(arch) > 10:
|
||||||
|
# This can't be an architecture really.
|
||||||
|
# It's an ugly solution but works for now so deal with it.
|
||||||
|
continue
|
||||||
|
repo = os.environ["GITHUB_REPOSITORY"].split("/")[1]
|
||||||
|
result.append(
|
||||||
|
{
|
||||||
|
"type": "gauge",
|
||||||
|
"name": "arduino.downloads.total",
|
||||||
|
"value": value,
|
||||||
|
"host": os.environ["GITHUB_REPOSITORY"],
|
||||||
|
"tags": [
|
||||||
|
f"version:{version}",
|
||||||
|
f"os:{os_version}",
|
||||||
|
f"arch:{arch}",
|
||||||
|
"cdn:downloads.arduino.cc",
|
||||||
|
f"project:{repo}",
|
||||||
|
],
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
|
return result
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
DEST_S3_OUTPUT = os.environ["AWS_ATHENA_OUTPUT_LOCATION"]
|
||||||
|
AWS_ATHENA_SOURCE_TABLE = os.environ["AWS_ATHENA_SOURCE_TABLE"]
|
||||||
|
|
||||||
|
session = boto3.session.Session(region_name="us-east-1")
|
||||||
|
athena_client = session.client("athena")
|
||||||
|
|
||||||
|
# Load all partitions before querying downloads
|
||||||
|
execute(athena_client, f"MSCK REPAIR TABLE {AWS_ATHENA_SOURCE_TABLE};", DEST_S3_OUTPUT)
|
||||||
|
|
||||||
|
query = f"""SELECT replace(json_extract_scalar(url_decode(url_decode(querystring)),
|
||||||
|
'$.data.url'), 'https://downloads.arduino.cc/arduino-ide/arduino-ide_', '')
|
||||||
|
AS flavor, count(json_extract(url_decode(url_decode(querystring)),'$')) AS gauge
|
||||||
|
FROM {AWS_ATHENA_SOURCE_TABLE}
|
||||||
|
WHERE json_extract_scalar(url_decode(url_decode(querystring)),'$.data.url')
|
||||||
|
LIKE 'https://downloads.arduino.cc/arduino-ide/arduino-ide_%'
|
||||||
|
AND json_extract_scalar(url_decode(url_decode(querystring)),'$.data.url')
|
||||||
|
NOT LIKE '%latest%' -- exclude latest redirect
|
||||||
|
group by 1 ;"""
|
||||||
|
exec_id = execute(athena_client, query, DEST_S3_OUTPUT)
|
||||||
|
results = get_results(athena_client, exec_id)
|
||||||
|
result_json = convert_data(results)
|
||||||
|
|
||||||
|
print(f"::set-output name=result::{result_json}")
|
Loading…
x
Reference in New Issue
Block a user