Path: blob/master/GitHub/GitHub_Get_interactions_from_open_issues.ipynb
2973 views
Kernel: Python 3
GitHub - Get interactions from open issues
Tags: #github #openIssues #reaction #comment #creator #assignees #automation
Author: Varsha Kumar
Last update: 2024-06-10 (Created: 2024-06-04)
Description: This notebook allows users to retrieve all comments, reactions and creator/assignee data from open issues on a given repository.
Input
In [1]:
import requests import re import pandas as pd import naas_python as naas from urllib.parse import urlparse import time import plotly.express as px from naas_drivers import github
Setup variables
github_token
: personal token createsgithub_url
: link to the chosen github repostate
: type of issue
In [2]:
github_token = naas.secret.get("GITHUB_TOKEN").value repo_url = "https://github.com/jupyter-naas/awesome-notebooks" state = 'open' output_csv = f"{repo_url.split('github.com/')[1].replace('/', '_')}_{state}_issues.csv"
Model
Filter on open issues without pull requests
In [3]:
def get_filtered_open_issues(github_url, personal_access_token): # Parse the GitHub URL to get the owner and repo name parsed_url = urlparse(github_url) path_parts = parsed_url.path.strip('/').split('/') if len(path_parts) != 2: raise ValueError("Invalid GitHub URL. It should be in the format: https://github.com/owner/repo") owner, repo = path_parts # GitHub API URL for issues api_url = f"https://api.github.com/repos/{owner}/{repo}/issues" # Set up headers with personal access token for authentication headers = { 'Authorization': f'token {personal_access_token}', 'Accept': 'application/vnd.github.v3+json' } # Set up parameters to get only open issues params = { 'state': state, 'filter': 'all', 'per_page': 100 # Set the number of issues per page to 100 (maximum allowed by GitHub API) } # Initialize an empty list to store all open issues excluding pull requests filtered_issues = [] # Pagination to fetch all issues page = 1 while True: params['page'] = page response = requests.get(api_url, headers=headers, params=params) if response.status_code != 200: raise Exception(f"Error fetching issues from GitHub: {response.status_code} {response.reason}") issues = response.json() if not issues: break # Filter out pull requests from the issues for issue in issues: if 'pull_request' not in issue: filtered_issues.append(issue) page += 1 return filtered_issues filtered_issues = get_filtered_open_issues(repo_url, github_token)
Get interactions from issues
In [4]:
# Get comments and reactions from issues def get_all_comments(api_url, personal_access_token): # Set up headers with personal access token for authentication headers = { 'Authorization': f'token {personal_access_token}', 'Accept': 'application/vnd.github.v3+json' } # Get comments for the issue response = requests.get(api_url + '/comments', headers=headers) if response.status_code != 200: raise Exception(f"Error fetching comments from GitHub: {response.status_code} {response.reason}") comments = response.json() return comments def get_comment_reactions(comment_url, personal_access_token): headers = { "Authorization": f"token {personal_access_token}", "Accept": "application/vnd.github.squirrel-girl-preview+json" } # Get reactions for the issue response = requests.get(comment_url + "/reactions", headers=headers) response.raise_for_status() return response.json() def get_interactions( url, uid, interaction_type, content, time, user_login, user_id, ): return { "URL": url, "ID": uid, "TYPE": interaction_type, "CONTENT": content, "DATE_TIME": time, "USER_LOGIN": user_login, "USER_ID": user_id, }
Output
Display table of interactions
In [5]:
data = [] for issue in filtered_issues: # Creator issue_number = issue['number'] issue_url = issue['url'] issue_content = issue['title'] issue_time = issue['created_at'] issue_user_login = issue['user']['login'] issue_user_id = issue['user']['id'] # Get creator data_creator = get_interactions( issue_url, str(issue_number) + "_" + str(issue_user_id), "CREATOR", issue_content, issue_time, issue_user_login, issue_user_id, ) data.append(data_creator) # Get Assignees if len(issue['assignees']) > 0: for assignee in issue['assignees']: data_assignee = get_interactions( issue_url, str(issue_number) + "_" + str(assignee['id']), "ASSIGNEE", issue_content, issue_time, assignee['login'], assignee['id'], ) data.append(data_assignee) # Get comments comments = get_all_comments(issue_url, github_token) for comment in comments: comment_id = comment['id'] comment_body = comment['body'] comment_created_at = comment['created_at'] comment_user_login = comment['user']['login'] comment_user_id = comment['user']['id'] data_comment = get_interactions( issue_url, comment_id, "COMMENT", comment_body, comment_created_at, comment_user_login, comment_user_id, ) data.append(data_comment) #print (comment) reactions = get_comment_reactions(comment['url'], github_token) if len(reactions) > 0: for reaction in reactions: reaction_id = reaction['id'] reaction_body = reaction['content'] reaction_created_at = reaction['created_at'] reaction_user_login = reaction['user']['login'] reaction_user_id = reaction['user']['id'] data_reaction = get_interactions( issue_url, reaction_id, "REACTION", reaction_body, reaction_created_at, reaction_user_login, reaction_user_id, ) data.append(data_reaction) df = pd.DataFrame(data) print(len(df)) df.head(1)
Save DataFrame to csv
In [6]:
# df.to_csv(output_csv, index=False)
Display table with interactions count
In [6]:
def get_interactions(df): # Groupby and count df = df.groupby(["USER_LOGIN"], as_index=False).agg({"ID": "count"}) # Cleaning df = df.rename(columns={"ID": "NUMBER_OF_INTERACTIONS"}) return df.sort_values(by="NUMBER_OF_INTERACTIONS", ascending=False).reset_index(drop=True) df_statistics = get_interactions(df) pd.DataFrame(df_statistics)
Display on bar chart
In [7]:
def create_barchart(df, url): # Get repository repository = url.split("/")[-1] # Sort df df = df.sort_values(by="NUMBER_OF_INTERACTIONS") # Calc commits interactions = df.NUMBER_OF_INTERACTIONS.sum() # Create fig fig = px.bar( df, y="USER_LOGIN", x="NUMBER_OF_INTERACTIONS", orientation="h", title=f"GitHub - {repository} : Issue interactions by user <br><span style='font-size: 13px;'>Total interactions: {interactions}</span>", text="NUMBER_OF_INTERACTIONS", labels={"USER_LOGIN": "Login", "NUMBER_OF_INTERACTIONS": "Number of Interactions"}, ) fig.update_traces(marker_color="black") fig.update_layout( plot_bgcolor="#ffffff", width=1200, height=800, font=dict(family="Arial", size=14, color="black"), paper_bgcolor="white", xaxis_title=None, xaxis_showticklabels=False, yaxis_title=None, margin_pad=10, ) fig.show() return fig fig = create_barchart(df_statistics, repo_url)
Display table with top user data
In [13]:
# Get top user top_user = df_statistics.at[0, 'USER_LOGIN'] # Get number of creations, assignments, comments, and reactions top_user_creations = ((df['USER_LOGIN'] == top_user) & (df['TYPE'] == 'CREATOR')).sum() top_user_assignees = ((df['USER_LOGIN'] == top_user) & (df['TYPE'] == 'ASSIGNEE')).sum() top_user_comments = ((df['USER_LOGIN'] == top_user) & (df['TYPE'] == 'COMMENT')).sum() top_user_reactions = ((df['USER_LOGIN'] == top_user) & (df['TYPE'] == 'REACTION')).sum() top_user_data = [] top_user_data.append({ "TYPE": "Creations", "AMOUNT": top_user_creations }) top_user_data.append({ "TYPE": "Assignments", "AMOUNT": top_user_assignees }) top_user_data.append({ "TYPE": "Comments", "AMOUNT": top_user_comments }) top_user_data.append({ "TYPE": "Reactions", "AMOUNT": top_user_reactions }) df_top_user_data = pd.DataFrame(top_user_data) df_top_user_data
Display on bar chart
In [17]:
def create_barchart(df, user, url): repository = url.split("/")[-1] # Sort df df = df.sort_values(by="AMOUNT") # Calc commits interactions = df.AMOUNT.sum() # Create fig fig = px.bar( df, y="TYPE", x="AMOUNT", orientation="h", title=f"GitHub - {repository} : Issue interactions by top user <br><span style='font-size: 13px;'>Total interactions: {interactions}<br>Top user: {user}</span>", text="AMOUNT", labels={"TYPE": "type", "AMOUNT": "amount"}, ) fig.update_traces(marker_color="black") fig.update_layout( plot_bgcolor="#ffffff", width=1200, height=800, font=dict(family="Arial", size=14, color="black"), paper_bgcolor="white", xaxis_title=None, xaxis_showticklabels=False, yaxis_title=None, margin_pad=10, ) fig.show() return fig fig = create_barchart(df_top_user_data, top_user, repo_url)
In [ ]: