From df7ad5f91c0a32356e7648f88091e55e04a6f8a1 Mon Sep 17 00:00:00 2001 From: Juanli Shen Date: Mon, 19 Aug 2019 14:58:01 -0700 Subject: [PATCH] Repo stats tracking --- tools/gcp/github_stats_tracking/app.yaml | 13 +++ .../github_stats_tracking/appengine_config.py | 19 ++++ tools/gcp/github_stats_tracking/cron.yaml | 4 + tools/gcp/github_stats_tracking/fetch_data.py | 94 +++++++++++++++++++ tools/gcp/github_stats_tracking/main.py | 29 ++++++ 5 files changed, 159 insertions(+) create mode 100644 tools/gcp/github_stats_tracking/app.yaml create mode 100644 tools/gcp/github_stats_tracking/appengine_config.py create mode 100644 tools/gcp/github_stats_tracking/cron.yaml create mode 100644 tools/gcp/github_stats_tracking/fetch_data.py create mode 100644 tools/gcp/github_stats_tracking/main.py diff --git a/tools/gcp/github_stats_tracking/app.yaml b/tools/gcp/github_stats_tracking/app.yaml new file mode 100644 index 00000000000..b0fa5573649 --- /dev/null +++ b/tools/gcp/github_stats_tracking/app.yaml @@ -0,0 +1,13 @@ +runtime: python27 +api_version: 1 +threadsafe: true + +service: github-stats-tracking + +handlers: +- url: /.* + script: main.app + +libraries: +- name: ssl + version: latest diff --git a/tools/gcp/github_stats_tracking/appengine_config.py b/tools/gcp/github_stats_tracking/appengine_config.py new file mode 100644 index 00000000000..086be2aefff --- /dev/null +++ b/tools/gcp/github_stats_tracking/appengine_config.py @@ -0,0 +1,19 @@ +# Copyright 2019 gRPC authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# appengine_config.py +from google.appengine.ext import vendor + +# Add any libraries install in the "lib" folder. +vendor.add('lib') diff --git a/tools/gcp/github_stats_tracking/cron.yaml b/tools/gcp/github_stats_tracking/cron.yaml new file mode 100644 index 00000000000..b5b36be92c6 --- /dev/null +++ b/tools/gcp/github_stats_tracking/cron.yaml @@ -0,0 +1,4 @@ +cron: +- description: "daily github stats tracking job" + url: /daily + schedule: every 24 hours diff --git a/tools/gcp/github_stats_tracking/fetch_data.py b/tools/gcp/github_stats_tracking/fetch_data.py new file mode 100644 index 00000000000..ed183a15a25 --- /dev/null +++ b/tools/gcp/github_stats_tracking/fetch_data.py @@ -0,0 +1,94 @@ +# Copyright 2019 gRPC authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from github import Github, Label +from datetime import datetime, timedelta +from time import time +from google.cloud import bigquery + +ACCESS_TOKEN = "" + + +def get_stats_from_github(): + # Please set the access token properly before deploying. + assert ACCESS_TOKEN + g = Github(ACCESS_TOKEN) + print g.rate_limiting + repo = g.get_repo('grpc/grpc') + + LABEL_LANG = set(label for label in repo.get_labels() + if label.name.split('/')[0] == 'lang') + LABEL_KIND_BUG = repo.get_label('kind/bug') + LABEL_PRIORITY_P0 = repo.get_label('priority/P0') + LABEL_PRIORITY_P1 = repo.get_label('priority/P1') + LABEL_PRIORITY_P2 = repo.get_label('priority/P2') + + def is_untriaged(issue): + key_labels = set() + for label in issue.labels: + label_kind = label.name.split('/')[0] + if label_kind in ('lang', 'kind', 'priority'): + key_labels.add(label_kind) + return len(key_labels) < 3 + + untriaged_open_issues = [ + issue for issue in repo.get_issues(state='open') + if issue.pull_request is None and is_untriaged(issue) + ] + total_bugs = [ + issue + for issue in repo.get_issues(state='all', labels=[LABEL_KIND_BUG]) + if issue.pull_request is None + ] + + lang_to_stats = {} + for lang in LABEL_LANG: + lang_bugs = filter(lambda bug: lang in bug.labels, total_bugs) + closed_bugs = filter(lambda bug: bug.state == 'closed', lang_bugs) + open_bugs = filter(lambda bug: bug.state == 'open', lang_bugs) + open_p0_bugs = filter(lambda bug: LABEL_PRIORITY_P0 in bug.labels, + open_bugs) + open_p1_bugs = filter(lambda bug: LABEL_PRIORITY_P1 in bug.labels, + open_bugs) + open_p2_bugs = filter(lambda bug: LABEL_PRIORITY_P2 in bug.labels, + open_bugs) + lang_to_stats[lang] = [ + len(lang_bugs), + len(closed_bugs), + len(open_bugs), + len(open_p0_bugs), + len(open_p1_bugs), + len(open_p2_bugs) + ] + return len(untriaged_open_issues), lang_to_stats + + +def insert_stats_to_db(untriaged_open_issues, lang_to_stats): + timestamp = time() + client = bigquery.Client() + dataset_ref = client.dataset('github_issues') + table_ref = dataset_ref.table('untriaged_issues') + table = client.get_table(table_ref) + errors = client.insert_rows(table, [(timestamp, untriaged_open_issues)]) + table_ref = dataset_ref.table('bug_stats') + table = client.get_table(table_ref) + rows = [] + for lang, stats in lang_to_stats.iteritems(): + rows.append((timestamp, lang.name[5:]) + tuple(stats)) + errors = client.insert_rows(table, rows) + + +def fetch(): + untriaged_open_issues, lang_to_stats = get_stats_from_github() + insert_stats_to_db(untriaged_open_issues, lang_to_stats) diff --git a/tools/gcp/github_stats_tracking/main.py b/tools/gcp/github_stats_tracking/main.py new file mode 100644 index 00000000000..f1e7ca6d981 --- /dev/null +++ b/tools/gcp/github_stats_tracking/main.py @@ -0,0 +1,29 @@ +# Copyright 2019 gRPC authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import webapp2 +from fetch_data import fetch + + +class DailyCron(webapp2.RequestHandler): + + def get(self): + fetch() + self.response.status = 204 + + +app = webapp2.WSGIApplication( + [ + ('/daily', DailyCron), + ], debug=True)