Path: blob/master/GitHub/GitHub_Get_interactions_from_repository_url.ipynb
2973 views
Kernel: Python 3
GitHub - Get interactions from repository url
Tags: #github #issues #reaction #comment #creator #assignees #automation
Author: Varsha Kumar
Last update: 2024-06-17 (Created: 2024-06-11)
Description: This notebook allows users to retrieve interaction data from issues on a given repository.
Input
Import libraries
In [1]:
import requests import re import pandas as pd import naas_python as naas from urllib.parse import urlparse import time import plotly.express as px from naas_drivers import github import ipywidgets as widgets from IPython.display import display, clear_output
Setup variables
github_token
: personal token createsrepo_url
: link to the chosen github repooutput_csv
: output to excel file
In [3]:
github_token = naas.secret.get("GITHUB_TOKEN").value repo_url = "https://github.com/jupyter-naas/workspace" output_csv = f"{repo_url.split('github.com/')[1].replace('/', '_')}_interactions.csv" print(output_csv)
Model
Get issues and the issue type
In [4]:
def get_github_owner_repo(repo_url): parts = repo_url.rstrip('/').split('/') owner = parts[-2] repo = parts[-1] return owner, repo def get_issues(owner, repo, token, limit=1000): api_url = f"https://api.github.com/repos/{owner}/{repo}/issues" headers = { 'Authorization': f'token {token}', 'Accept': 'application/vnd.github.v3+json' } issues = [] page = 1 while len(issues) < limit: response = requests.get(api_url, headers=headers, params={'per_page': 100, 'page': page}) if response.status_code != 200: raise Exception(f"Error: {response.status_code} - {response.text}") page_issues = response.json() if not page_issues: break issues.extend(page_issues) if len(issues) >= limit: issues = issues[:limit] break page += 1 return issues
Get interactions from issues
In [5]:
# Get organization owner(s) def get_repository_info(repo_url, token): headers = { 'Authorization': f'token {token}', 'Accept': 'application/vnd.github.v3+json' } response = requests.get(repo_url, headers=headers) if response.status_code != 200: raise Exception(f"Error: {response.status_code} - {response.text}") repo_data = response.json() return repo_data def get_organization_members(org_name, token, role='all'): api_url = f"https://api.github.com/orgs/{org_name}/members" headers = { 'Authorization': f'token {token}', 'Accept': 'application/vnd.github.v3+json' } params = {'role': role} response = requests.get(api_url, headers=headers, params=params) if response.status_code != 200: raise Exception(f"Error: {response.status_code} - {response.text}") members = response.json() return members def get_organization_owners(org_name, token): owners = get_organization_members(org_name, token, role='admin') return owners # Get pull request commit(s) def get_pull_request_commits(pull_request_url, token): headers = { 'Authorization': f'token {token}', 'Accept': 'application/vnd.github.v3+json' } response = requests.get(pull_request_url + "/commits", headers=headers) if response.status_code != 200: raise Exception(f"Error: {response.status_code} - {response.text}") commits = response.json() return commits # Get comments and reactions from issues def get_all_comments(api_url, personal_access_token): # Set up headers with personal access token for authentication headers = { 'Authorization': f'token {personal_access_token}', 'Accept': 'application/vnd.github.v3+json' } # Get comments for the issue response = requests.get(api_url + '/comments', headers=headers) if response.status_code != 200: raise Exception(f"Error fetching comments from GitHub: {response.status_code} {response.reason}") comments = response.json() return comments def get_comment_reactions(comment_url, personal_access_token): headers = { "Authorization": f"token {personal_access_token}", "Accept": "application/vnd.github.squirrel-girl-preview+json" } # Get reactions for the issue response = requests.get(comment_url + "/reactions", headers=headers) response.raise_for_status() return response.json()
Get table of interactions
In [6]:
data = [] def get_interactions( url, uid, issue_state, owners, contributors, interaction_type, content, time, user_login, user_id ): return { "URL": url, "ID": uid, "ISSUE_STATE": issue_state, "ORGANIZATION_OWNER(S)": owners, "CONTRIBUTOR(S)": participants, "INTERACTION_TYPE": interaction_type, "CONTENT": content, "DATE_TIME": time, "USER_LOGIN": user_login, "USER_ID": user_id, } owner, repo = get_github_owner_repo(repo_url) issues = get_issues(owner, repo, github_token) for issue in issues: # Get contributors for issue participants = set() # Get organization owners for issue repo_url = '/'.join(issue['url'].split('/')[:-2]) repo_data = get_repository_info(repo_url, github_token) owners_output = "" if 'organization' in repo_data['owner']['type'].lower(): org_name = repo_data['owner']['login'] owners = get_organization_owners(org_name, github_token) for owner in owners: if (owners.index(owner) != len(owners)-1): owners_output += f"{owner['login']}, " else: owners_output += f"{owner['login']}" else: owners_output = "NONE" # Basic issue data issue_state = issue['state'] is_pull_request = 'pull_request' in issue issue_state = 'Pull Request' if is_pull_request else issue_state issue_number = issue['number'] issue_url = issue['url'] issue_content = issue['title'] issue_time = issue['created_at'] issue_user_login = issue['user']['login'] issue_user_id = issue['user']['id'] participants.add(issue['user']['login']) # Get creator data_creator = get_interactions( issue_url, str(issue_number) + "_" + str(issue_user_id), issue_state, owners_output, participants, "CREATOR", issue_content, issue_time, issue_user_login, issue_user_id, ) data.append(data_creator) # Get Assignees if len(issue['assignees']) > 0: for assignee in issue['assignees']: participants.add(assignee['login']) data_assignee = get_interactions( issue_url, str(issue_number) + "_" + str(assignee['id']), issue_state, owners_output, participants, "ASSIGNEE", issue_content, issue_time, assignee['login'], assignee['id'], ) data.append(data_assignee) # Get comments comments = get_all_comments(issue_url, github_token) for comment in comments: comment_id = comment['id'] comment_body = comment['body'] comment_created_at = comment['created_at'] comment_user_login = comment['user']['login'] comment_user_id = comment['user']['id'] participants.add(comment['user']['login']) data_comment = get_interactions( issue_url, str(issue_number) + "_" + str(comment_id), issue_state, owners_output, participants, "COMMENT", comment_body, comment_created_at, comment_user_login, comment_user_id, ) data.append(data_comment) #print (comment) reactions = get_comment_reactions(comment['url'], github_token) if len(reactions) > 0: for reaction in reactions: reaction_id = reaction['id'] reaction_body = reaction['content'] reaction_created_at = reaction['created_at'] reaction_user_login = reaction['user']['login'] reaction_user_id = reaction['user']['id'] participants.add(reaction['user']['login']) data_reaction = get_interactions( issue_url, str(issue_number) + "_" + str(reaction_id), issue_state, owners_output, participants, "REACTION", reaction_body, reaction_created_at, reaction_user_login, reaction_user_id, ) data.append(data_reaction) if (issue_state == 'Pull Request'): pull_request_url = issue['pull_request']['url'] commits = get_pull_request_commits(pull_request_url, github_token) for commit in commits: commit_id = commit['node_id'] commit_body = commit['commit']['message'] commit_created_at = commit['commit']['author']['date'] commit_user_login = commit['author']['login'] commit_user_id = commit['author']['id'] participants.add(commit['author']['login']) data_commits = get_interactions( issue_url, str(issue_number) + "_" + str(commit_id), issue_state, owners_output, participants, "COMMIT", commit_body, commit_created_at, commit_user_login, commit_user_id, ) data.append(data_commits) df = pd.DataFrame(data)
Output
Save DataFrame to csv
In [7]:
df.to_csv(output_csv, index=False)
In [ ]: