mirror of
https://github.com/arduino/arduino-ide.git
synced 2025-07-15 07:16:38 +00:00
Remove broken download stats workflow
The "arduino-stats" GitHub Actions workflow was designed to periodically gather download statistics from Arduino CDN and push results to Datadog. The recorded stats from the identical system in the Arduino CLI repository showed a periodic decrease in total download count. Since this is patently impossible, it is clear that something is wrong with the system and that the recorded data is not trustworthy. An investigation into the problem was never done. On 2022-03-14, the runs of the "arduino-stats" GitHub Actions workflow began to fail. Because there had not been any relevant change in the repository between the last successful run and the first failing run, it seems that some external change caused the breakage. The workflow also uses deprecated Node.js 12 runtime-based actions and set-output workflow command, which currently results in warnings printed to the workflow run summary page, but will eventually cause the complete breakage of the workflow. Since the workflow was not ever working successfully and the lack of an investigation about that indicates that the stats are not of immediate importance, the best course of action is to simply remove the broken infrastructure from the repository rather than investing time into fixing something that isn't being used anyway.
This commit is contained in:
parent
c29452a858
commit
9cd03bec46
131
.github/tools/fetch_athena_stats.py
vendored
131
.github/tools/fetch_athena_stats.py
vendored
@ -1,131 +0,0 @@
|
||||
import boto3
|
||||
import semver
|
||||
import os
|
||||
import logging
|
||||
import uuid
|
||||
import time
|
||||
|
||||
|
||||
# logging.basicConfig(stream=sys.stdout, level=logging.DEBUG)
|
||||
log = logging.getLogger()
|
||||
logging.getLogger("boto3").setLevel(logging.CRITICAL)
|
||||
logging.getLogger("botocore").setLevel(logging.CRITICAL)
|
||||
logging.getLogger("urllib3").setLevel(logging.CRITICAL)
|
||||
|
||||
|
||||
def execute(client, statement, dest_s3_output_location):
|
||||
log.info("execute query: {} dumping in {}".format(statement, dest_s3_output_location))
|
||||
result = client.start_query_execution(
|
||||
QueryString=statement,
|
||||
ClientRequestToken=str(uuid.uuid4()),
|
||||
ResultConfiguration={
|
||||
"OutputLocation": dest_s3_output_location,
|
||||
},
|
||||
)
|
||||
execution_id = result["QueryExecutionId"]
|
||||
log.info("wait for query {} completion".format(execution_id))
|
||||
wait_for_query_execution_completion(client, execution_id)
|
||||
log.info("operation successful")
|
||||
return execution_id
|
||||
|
||||
|
||||
def wait_for_query_execution_completion(client, query_execution_id):
|
||||
query_ended = False
|
||||
while not query_ended:
|
||||
query_execution = client.get_query_execution(QueryExecutionId=query_execution_id)
|
||||
state = query_execution["QueryExecution"]["Status"]["State"]
|
||||
if state == "SUCCEEDED":
|
||||
query_ended = True
|
||||
elif state in ["FAILED", "CANCELLED"]:
|
||||
raise BaseException(
|
||||
"query failed or canceled: {}".format(query_execution["QueryExecution"]["Status"]["StateChangeReason"])
|
||||
)
|
||||
else:
|
||||
time.sleep(1)
|
||||
|
||||
|
||||
def valid(key):
|
||||
split = key.split("_")
|
||||
if len(split) < 1:
|
||||
return False
|
||||
try:
|
||||
semver.parse(split[0])
|
||||
except ValueError:
|
||||
return False
|
||||
return True
|
||||
|
||||
|
||||
def get_results(client, execution_id):
|
||||
results_paginator = client.get_paginator("get_query_results")
|
||||
results_iter = results_paginator.paginate(QueryExecutionId=execution_id, PaginationConfig={"PageSize": 1000})
|
||||
res = {}
|
||||
for results_page in results_iter:
|
||||
for row in results_page["ResultSet"]["Rows"][1:]:
|
||||
# Loop through the JSON objects
|
||||
key = row["Data"][0]["VarCharValue"]
|
||||
if valid(key):
|
||||
res[key] = row["Data"][1]["VarCharValue"]
|
||||
|
||||
return res
|
||||
|
||||
|
||||
def convert_data(data):
|
||||
result = []
|
||||
for key, value in data.items():
|
||||
# 0.18.0_macOS_64bit.tar.gz
|
||||
split_key = key.split("_")
|
||||
if len(split_key) != 3:
|
||||
continue
|
||||
(version, os_version, arch) = split_key
|
||||
arch_split = arch.split(".")
|
||||
if len(arch_split) < 1:
|
||||
continue
|
||||
arch = arch_split[0]
|
||||
if len(arch) > 10:
|
||||
# This can't be an architecture really.
|
||||
# It's an ugly solution but works for now so deal with it.
|
||||
continue
|
||||
repo = os.environ["GITHUB_REPOSITORY"].split("/")[1]
|
||||
result.append(
|
||||
{
|
||||
"type": "gauge",
|
||||
"name": "arduino.downloads.total",
|
||||
"value": value,
|
||||
"host": os.environ["GITHUB_REPOSITORY"],
|
||||
"tags": [
|
||||
f"version:{version}",
|
||||
f"os:{os_version}",
|
||||
f"arch:{arch}",
|
||||
"cdn:downloads.arduino.cc",
|
||||
f"project:{repo}",
|
||||
],
|
||||
}
|
||||
)
|
||||
|
||||
return result
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
DEST_S3_OUTPUT = os.environ["AWS_ATHENA_OUTPUT_LOCATION"]
|
||||
AWS_ATHENA_SOURCE_TABLE = os.environ["AWS_ATHENA_SOURCE_TABLE"]
|
||||
|
||||
session = boto3.session.Session(region_name="us-east-1")
|
||||
athena_client = session.client("athena")
|
||||
|
||||
# Load all partitions before querying downloads
|
||||
execute(athena_client, f"MSCK REPAIR TABLE {AWS_ATHENA_SOURCE_TABLE};", DEST_S3_OUTPUT)
|
||||
|
||||
query = f"""SELECT replace(json_extract_scalar(url_decode(url_decode(querystring)),
|
||||
'$.data.url'), 'https://downloads.arduino.cc/arduino-ide/arduino-ide_', '')
|
||||
AS flavor, count(json_extract(url_decode(url_decode(querystring)),'$')) AS gauge
|
||||
FROM {AWS_ATHENA_SOURCE_TABLE}
|
||||
WHERE json_extract_scalar(url_decode(url_decode(querystring)),'$.data.url')
|
||||
LIKE 'https://downloads.arduino.cc/arduino-ide/arduino-ide_%'
|
||||
AND json_extract_scalar(url_decode(url_decode(querystring)),'$.data.url')
|
||||
NOT LIKE '%latest%' -- exclude latest redirect
|
||||
group by 1 ;"""
|
||||
exec_id = execute(athena_client, query, DEST_S3_OUTPUT)
|
||||
results = get_results(athena_client, exec_id)
|
||||
result_json = convert_data(results)
|
||||
|
||||
print(f"::set-output name=result::{result_json}")
|
57
.github/workflows/arduino-stats.yml
vendored
57
.github/workflows/arduino-stats.yml
vendored
@ -1,57 +0,0 @@
|
||||
name: arduino-stats
|
||||
|
||||
on:
|
||||
schedule:
|
||||
# run every day at 07:00 AM, 03:00 PM and 11:00 PM
|
||||
- cron: "0 7,15,23 * * *"
|
||||
workflow_dispatch:
|
||||
repository_dispatch:
|
||||
|
||||
jobs:
|
||||
push-stats:
|
||||
# This workflow is only of value to the arduino/arduino-ide repository and
|
||||
# would always fail in forks
|
||||
if: github.repository == 'arduino/arduino-ide'
|
||||
runs-on: ubuntu-latest
|
||||
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v2
|
||||
|
||||
- uses: actions/setup-python@v2
|
||||
with:
|
||||
python-version: '3.x'
|
||||
|
||||
- name: Fetch downloads count form Arduino CDN using AWS Athena
|
||||
id: fetch
|
||||
env:
|
||||
AWS_ACCESS_KEY_ID: ${{ secrets.STATS_AWS_ACCESS_KEY_ID }}
|
||||
AWS_SECRET_ACCESS_KEY: ${{ secrets.STATS_AWS_SECRET_ACCESS_KEY }}
|
||||
AWS_ATHENA_SOURCE_TABLE: ${{ secrets.STATS_AWS_ATHENA_SOURCE_TABLE }}
|
||||
AWS_ATHENA_OUTPUT_LOCATION: ${{ secrets.STATS_AWS_ATHENA_OUTPUT_LOCATION }}
|
||||
GITHUB_REPOSITORY: ${{ github.repository }}
|
||||
run: |
|
||||
pip install boto3 semver
|
||||
python .github/tools/fetch_athena_stats.py
|
||||
|
||||
- name: Send metrics
|
||||
uses: masci/datadog@v1
|
||||
with:
|
||||
api-key: ${{ secrets.DD_API_KEY }}
|
||||
# Metrics input expects YAML but JSON will work just right.
|
||||
metrics: ${{steps.fetch.outputs.result}}
|
||||
|
||||
- name: Report failure
|
||||
if: failure()
|
||||
uses: masci/datadog@v1
|
||||
with:
|
||||
api-key: ${{ secrets.DD_API_KEY }}
|
||||
events: |
|
||||
- title: "Arduino IDE stats failing"
|
||||
text: "Stats collection failed"
|
||||
alert_type: "error"
|
||||
host: ${{ github.repository }}
|
||||
tags:
|
||||
- "project:arduino-ide"
|
||||
- "cdn:downloads.arduino.cc"
|
||||
- "workflow:${{ github.workflow }}"
|
Loading…
x
Reference in New Issue
Block a user