Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
jupyter-naas
GitHub Repository: jupyter-naas/awesome-notebooks
Path: blob/master/GitHub/GitHub_Create_leaderboard_of_contributors.ipynb
2973 views
Kernel: Python 3

GitHub.png

GitHub - Create leaderboard of contributors

Give Feedback | Bug report

Tags: #github #repos #commits #stats #naas_drivers #leaderboard #commitsPoints

Author: Suhas B

Last update: 2023-04-12 (Created: 2023-02-01)

Input

Import libraries

import pandas as pd import plotly.express as px from naas_drivers import github import naas import requests from urllib.parse import urlencode

Setup Github

How to find your personal access token on Github?

# Github repository url REPO_URL = "https://github.com/jupyter-naas/awesome-notebooks" # Github token GITHUB_TOKEN = naas.secret.get("Git_Token")

Model

Get actor/user who closed the issue

# Function to get the actor for closing an issue def get_actor_for_closed_issue(events_url, git_obj): ''' This function is used to get actor for a closed issues It gives the name of a person who closed the issue. Parameters ---------- events_url: str: events_url from Github. Example : "https://api.github.com/repos/jupyter-naas/awesome-notebooks/issues/1401/events" git_obj: object of naas_driver.github ''' url = events_url res = requests.get(url, headers=git_obj.headers) try: res.raise_for_status() except requests.HTTPError as e: raise(e) res_json = res.json() if len(res_json) == 0: return actor = None for events in res_json: if events["event"] == "closed": actor= events["actor"]["login"] return actor # get_actor_for_closed_issue("https://api.github.com/repos/jupyter-naas/awesome-notebooks/issues/1392/events")

Get All Issues (both open and closed issues)

def get_all_issues(repo_url): ''' This function retrives all the issues of a repository and returns a dataframe with the following columns: link_to_the_issue issue_title issue_number issue_state issue_creator issue_closed_by last_created_date last_created_time last_updated_date last_updated_time Parameters ---------- repo_url: str: Repository url from Github. Example : "https://github.com/jupyter-naas/awesome-notebooks" ''' git_obj = github.connect(GITHUB_TOKEN) repository = git_obj.get_repository_url(repo_url) df = pd.DataFrame() page, idx = 1, 0 while True: params = { "per_page": "100", "page": page, } # Api to get open issues from github url = f"https://api.github.com/repos/{repository}/issues?state=all&{urlencode(params, safe='(),')}" res = requests.get(url, headers=git_obj.headers) try: res.raise_for_status() except requests.HTTPError as e: raise(e) res_json = res.json() if len(res_json) == 0: break for issue in res_json: # Fetch all the issues, check node_id to see if it is an issue or PR if(issue["node_id"].startswith("I_")): df.loc[idx,'link_to_the_issue'], df.loc[idx, 'issue_number'] = issue['html_url'], issue['number'] df.loc[idx, 'issue_title'], df.loc[idx, 'issue_state'] = issue['title'], issue['state'] df.loc[idx, "issue_creator"] = issue["user"].get("login") # Create a cloumn that stores the name of the person who closed the issue # else if the issue is Open the column value will be None df.loc[idx, "issue_closed_by"] = get_actor_for_closed_issue( issue["events_url"], git_obj ) df.loc[idx, 'last_created_date'] = issue.get('created_at').strip('Z').split('T')[0] df.loc[idx, 'last_created_time'] = issue.get('created_at').strip('Z').split('T')[-1] df.loc[idx, 'last_updated_date'] = issue.get('updated_at').strip('Z').split('T')[0] df.loc[idx, 'last_updated_time'] = issue.get('updated_at').strip('Z').split('T')[-1] idx +=1 page+=1 return df

Get Issue/PR comments

def get_issue_pr_comments(repo_url): ''' The function retrieves all the issue comments and pr comments for a given r repository url. The function returns a dataframe with following columns for an issue comment/pr comment comment_id issue_url comment_by comment_body ''' git_obj = github.connect(GITHUB_TOKEN) repository = git_obj.get_repository_url(repo_url) df = pd.DataFrame() page, idx = 1, 0 while True: params = { "per_page": "100", "page": page, } # Api to get comments from github url = f"https://api.github.com/repos/{repository}/issues/comments?{urlencode(params, safe='(),')}" res = requests.get(url, headers=git_obj.headers) try: res.raise_for_status() except requests.HTTPError as e: raise(e) res_json = res.json() if len(res_json) == 0: break for issue_comment in res_json: df.loc[idx, "comment_id"] = issue_comment["id"] df.loc[idx, "issue_url"] = issue_comment["issue_url"] df.loc[idx, "comment_by"] = issue_comment["user"]["login"] df.loc[idx, "comment_body"] = issue_comment["body"] idx +=1 page += 1 return df

Get Contributor Details

def get_contributors_details(repo_url): ''' The function retrieves contributors for a given repository url This function returns a dataframe of contributors with following columns contributor_name issues_created issues_closed issue_pr_comments commits ''' git_obj = github.connect(GITHUB_TOKEN) repository = git_obj.get_repository_url(repo_url) df = pd.DataFrame() page, idx = 1, 0 while True: params = { "per_page": "100", "page": page, } url = f"https://api.github.com/repos/{repository}/contributors?{urlencode(params, safe='(),')}" res = requests.get(url, headers=git_obj.headers) try: res.raise_for_status() except requests.HTTPError as e: raise(e) res_json = res.json() if len(res_json) == 0: break for contributor in res_json: df.loc[idx, "contributor_name"] = contributor["login"] # create a column to store number of issues created by a contributor df.loc[idx, "issues_created"] = len(df_all_issues.loc[df_all_issues["issue_creator"]==contributor["login"]]) # create a column to store number of issues closed by a contributor df.loc[idx, "issues_closed"] = len(df_all_issues.loc[df_all_issues["issue_closed_by"]==contributor["login"]]) # Need to get df for issue comments and PR comments df.loc[idx, "issue_pr_comments"] = len(df_all_comments.loc[df_all_comments["comment_by"] == contributor["login"]]) # add a column to store number of commits made by a contributor df.loc[idx, "commits"] = len(git_obj.repos.get_commits(REPO_URL, contributor["login"])) idx +=1 page += 1 return df

Get leaderboard

def get_leaderboard(df): ''' The function orders the given dataframe based on pts column in descending order. ''' return df.sort_values(by="pts", ascending=False).reset_index(drop=True)

Set Key Variables from function call

df_all_issues = get_all_issues(REPO_URL) print("Total open Issues fetched:", len(df_all_issues)) # df_all_issues.head() df_all_comments = get_issue_pr_comments(REPO_URL) # df_all_comments.head() df_contributors = get_contributors_details(REPO_URL) # df_contributors.head()

Add pts column to contributor dataframe

# Add a column of points for contributors df # pts is calculated by multiplying a pre-defined wieght for each of the columns # Feel free to modify the weight df_contributors["pts"] = 1*df_contributors["issues_created"] + 1*df_contributors["issues_closed"] + \ 0.5*df_contributors["issue_pr_comments"] + 1*df_contributors["commits"]

Output

df_leaderboard = get_leaderboard(df_contributors) print("Here are the top 10 contributors \n") df_leaderboard.head(10)