Path: blob/master/scenario-notebooks/Automated-Notebooks/AutomationGallery-CredentialScanOnAzureDataExplorer.ipynb
3253 views
Kernel: Synapse PySpark
Automation Gallery - Credential Scan on Azure Data Explorer
Notebook Version: 1.0
Python Version: Python 3.8
Apache Spark Version: 3.1
Required Packages: azure-kusto-data, azure-mgmt-kusto
Platforms Supported: Azure Synapse Analytics
Data Source Required: Data Explorer data tables
Description
This notebook provides step-by-step instructions and sample code to detect credential leak into Azure Data Explorer using Azure SDK for Python and KQL.
*** Need to download and install Python modules for Azure Data Explorer. ***
*** Please run the cells sequentially to avoid errors. Please do not use "run all cells". ***
Need to know more about KQL? Getting started with Kusto Query Language.
Table of Contents
Warm-up
Azure Authentication
Azure data Explorer Queries
1. Warm-up
In [ ]:
# If you need to know what Python modules are available, you may run this: # help("modules") # During the installation, there maybe some incompatible errors, you can safely ignore them. #!pip install azure-kusto-data #!pip install azure-mgmt-kusto --upgrade
In [ ]:
# Load Python libraries that will be used in this notebook from azure.mgmt.resource import ResourceManagementClient from azure.mgmt.kusto import KustoManagementClient from azure.kusto.data.exceptions import KustoServiceError from azure.kusto.data.helpers import dataframe_from_result_table from azure.kusto.data import KustoClient, KustoConnectionStringBuilder, ClientRequestProperties from azure.identity import AzureCliCredential, DefaultAzureCredential, ClientSecretCredential import time import pandas as pd import json import ipywidgets from IPython.display import display, HTML, Markdown import warnings
In [ ]:
# Functions will be used in this notebook def get_credscan_kql_where_clause(column_name): "This function return the KQL where clause for credscan" where_clause = " | where {0} " regex_string = "" regex_list = [ r"(?i)(ida:password|IssuerSecret|(api|client|app(lication)?)[_\\- ]?(key|secret)[^,a-z]|\\.azuredatabricks\\.net).{0,10}(dapi)?[a-z0-9/+]{22}", r"(?i)(x-api-(key|token).{0,10}[a-z0-9/+]{40}|v1\\.[a-z0-9/+]{40}[^a-z0-9/+])", r"(?-i)\\WAIza(?i)[a-z0-9_\\\\\\-]{35}\\W", r"(?i)(\\Wsig\\W|Secret(Value)?|IssuerSecret|(\\Wsas|primary|secondary|management|Shared(Access(Policy)?)?).?Key|\\.azure\\-devices\\.net|\\.(core|servicebus|redis\\.cache|accesscontrol|mediaservices)\\.(windows\\.net|chinacloudapi\\.cn|cloudapi\\.de|usgovcloudapi\\.net)|New\\-AzureRedisCache).{0,100}([a-z0-9/+]{43}=)", r"(?i)visualstudio\\.com.{1,100}\\W(?-i)[a-z2-7]{52}\\W", r"(?i)se=2021.+sig=[a-z0-9%]{43,63}%3d", r"(?i)(x-functions-key|ApiKey|Code=|\\.azurewebsites\\.net/api/).{0,100}[a-z0-9/\\+]{54}={2}", r"(?i)code=[a-z0-9%]{54,74}(%3d){2}", r"(?i)(userpwd|publishingpassword).{0,100}[a-z0-9/\\+]{60}\\W", r"(?i)[^a-z0-9/\\+][a-z0-9/\\+]{86}==", r"(?-i)\\-{5}BEGIN( ([DR]SA|EC|OPENSSH|PGP))? PRIVATE KEY( BLOCK)?\\-{5}", r"(?i)(app(lication)?|client)[_\\- ]?(key(url)?|secret)([\\s=:>]{1,10}|[\\s\"':=|>\\]]{3,15}|[\"'=:\\(]{2})[^\\-]", r"(?i)refresh[_\\-]?token([\\s=:>]{1,10}|[\\s\"':=|>\\]]{3,15}|[\"'=:\\(]{2})(\"data:text/plain,.+\"|[a-z0-9/+=_.-]{20,200})", r"(?i)AccessToken(Secret)?([\\s\"':=|>\\]]{3,15}|[\"'=:\\(]{2}|[\\s=:>]{1,10})[a-z0-9/+=_.-]{20,200}", r"(?i)[a-z0-9]{3,5}://[^%:\\s\"'/][^:\\s\"'/\\$]+[^:\\s\"'/\\$%]:([^%\\s\"'/][^@\\s\"'/]{0,100}[^%\\s\"'/])@[\\$a-z0-9:\\.\\-_%\\?=/]+", r"(?i)snmp(\\-server)?\\.exe.{0,100}(priv|community)", r"(?i)(ConvertTo\\-?SecureString\\s*((\\(|\\Wstring)\\s*)?['\"]+)", r"(?i)(Consumer|api)[_\\- ]?(Secret|Key)([\\s=:>]{1,10}|[\\s\"':=|>,\\]]{3,15}|[\"'=:\\(]{2})[^\\s]{5,}", r"(?i)authorization[,\\[:= \"']+([dbaohmnsv])", r"(?i)-u\\s+.{2,100}-p\\s+[^\\-/]", r"(?i)(amqp|ssh|(ht|f)tps?)://[^%:\\s\"'/][^:\\s\"'/\\$]+[^:\\s\"'/\\$%]:([^%\\s\"'/][^@\\s\"'/]{0,100}[^%\\s\"'/])@[\\$a-z0-9:\\.\\-_%\\?=/]+", r"(?i)(\\Waws|amazon)?.{0,5}(secret|access.?key).{0,10}\\W[a-z0-9/\\+]{40}", r"(?-i)(eyJ0eXAiOiJKV1Qi|eyJhbGci)", r"(?i)@(\\.(on)?)?microsoft\\.com[ -~\\s]{1,100}?(\\w?pass\\w?)", r"(?i)net(\\.exe)?.{1,5}(user\\s+|share\\s+/user:|user-?secrets? set)\\s+[a-z0-9]", r"(?i)xox[pbar]\\-[a-z0-9]", r"(?i)[\":\\s=]((x?corp|extranet(test)?|ntdev)(\\.microsoft\\.com)?|corp|redmond|europe|middleeast|northamerica|southpacific|southamerica|fareast|africa|exchange|extranet(test)?|partners|parttest|ntdev|ntwksta)\\W.{0,100}(password|\\Wpwd|\\Wpass|\\Wpw\\W|userpass)", r"(?i)(sign_in|SharePointOnlineAuthenticatedContext|(User|Exchange)Credentials?|password)[ -~\\s]{0,100}?@([a-z0-9.]+\\.(on)?)?microsoft\\.com['\"]?", r"(?i)(\\.database\\.azure\\.com|\\.database(\\.secure)?\\.windows\\.net|\\.cloudapp\\.net|\\.database\\.usgovcloudapi\\.net|\\.database\\.chinacloudapi\\.cn|\\.database.cloudapi.de).{0,100}(DB_PASS|(sql|service)?password|\\Wpwd\\W)", r"(?i)(secret(.?key)?|password)[\"']?\\s*[:=]\\s*[\"'][^\\s]+?[\"']", r"(?i)[^a-z\\$](DB_USER|user id|uid|(sql)?user(name)?|service\\s?account)\\s*[^\\w\\s,]([ -~\\s]{2,120}?|[ -~]{2,30}?)([^a-z\\s\\$]|\\s)\\s*(DB_PASS|(sql|service)?password|pwd)", r"(?i)(password|secret(key)?)[ \\t]*[=:]+[ \\t]*([^:\\s\"';,<]{2,200})", ] for (i, re_str) in enumerate(regex_list): if i != 0: if i == 27: regex_string += " and " else: regex_string += " or " regex_string += " " + column_name + " matches regex \"" + re_str + "\"" return where_clause.format(regex_string)
2. Azure Authentication
In [ ]:
tenant_id = '' subscription_id = '' akv_name = '' client_id_name = '' client_secret_name = '' akv_link_name = '' group_name = '' adx_cluster_name = ''
In [ ]:
client_id = mssparkutils.credentials.getSecret(akv_name, client_id_name, akv_link_name) client_secret = mssparkutils.credentials.getSecret(akv_name, client_secret_name, akv_link_name)
In [ ]:
credential = ClientSecretCredential( tenant_id=tenant_id, client_id=client_id, client_secret=client_secret)
In [ ]:
kusto_client = KustoManagementClient(credential, subscription_id = subscription_id) if kusto_client != None: cluster_list = list(kusto_client.clusters.list_by_resource_group(group_name)) if cluster_list != None: cluster_uris = [c.uri for c in cluster_list if c.name == adx_cluster_name] database_list = kusto_client.databases.list_by_cluster(group_name, cluster_name=adx_cluster_name) kcsb = KustoConnectionStringBuilder.with_aad_application_key_authentication(cluster_uris[0], client_id, client_secret, tenant_id) kusto_data_client = KustoClient(kcsb)
3. Azure Data Explorer Queries
In [ ]:
if kusto_data_client != None and database_list != None: database_name_list = sorted([c.name for c in database_list]) for database_name in database_name_list: db_name = database_name.split("/")[1] tables = kusto_data_client.execute_mgmt(db_name, ".show tables details") table_list = dataframe_from_result_table(tables.primary_results[0]).TableName.tolist() if table_list != None: #table_dropdown = ipywidgets.Dropdown(options=sorted(table_list), description='Tables:') column_name = "*" for table in table_list: kql_where_clause = get_credscan_kql_where_clause(column_name) query = "{0} {1}".format(table, kql_where_clause) print(table) try: # Run query result = kusto_data_client.execute_query(database=db_name, query=query) # Display Result final_result = dataframe_from_result_table(result.primary_results[0]) if final_result.size != 0: display(final_result) except: print("=============================="); print(" This table got http error:") print("==============================");