Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
Azure
GitHub Repository: Azure/Azure-Sentinel-Notebooks
Path: blob/master/scenario-notebooks/Automated-Notebooks/AutomationGallery-CredentialScanOnAzureDataExplorer.ipynb
3253 views
Kernel: Synapse PySpark

Automation Gallery - Credential Scan on Azure Data Explorer

Notebook Version: 1.0
Python Version: Python 3.8
Apache Spark Version: 3.1
Required Packages: azure-kusto-data, azure-mgmt-kusto
Platforms Supported: Azure Synapse Analytics

Data Source Required: Data Explorer data tables

Description

This notebook provides step-by-step instructions and sample code to detect credential leak into Azure Data Explorer using Azure SDK for Python and KQL.
*** Need to download and install Python modules for Azure Data Explorer. ***
*** Please run the cells sequentially to avoid errors. Please do not use "run all cells". ***
Need to know more about KQL? Getting started with Kusto Query Language.

Table of Contents

  1. Warm-up

  2. Azure Authentication

  3. Azure data Explorer Queries

1. Warm-up

# If you need to know what Python modules are available, you may run this: # help("modules") # During the installation, there maybe some incompatible errors, you can safely ignore them. #!pip install azure-kusto-data #!pip install azure-mgmt-kusto --upgrade
# Load Python libraries that will be used in this notebook from azure.mgmt.resource import ResourceManagementClient from azure.mgmt.kusto import KustoManagementClient from azure.kusto.data.exceptions import KustoServiceError from azure.kusto.data.helpers import dataframe_from_result_table from azure.kusto.data import KustoClient, KustoConnectionStringBuilder, ClientRequestProperties from azure.identity import AzureCliCredential, DefaultAzureCredential, ClientSecretCredential import time import pandas as pd import json import ipywidgets from IPython.display import display, HTML, Markdown import warnings
# Functions will be used in this notebook def get_credscan_kql_where_clause(column_name): "This function return the KQL where clause for credscan" where_clause = " | where {0} " regex_string = "" regex_list = [ r"(?i)(ida:password|IssuerSecret|(api|client|app(lication)?)[_\\- ]?(key|secret)[^,a-z]|\\.azuredatabricks\\.net).{0,10}(dapi)?[a-z0-9/+]{22}", r"(?i)(x-api-(key|token).{0,10}[a-z0-9/+]{40}|v1\\.[a-z0-9/+]{40}[^a-z0-9/+])", r"(?-i)\\WAIza(?i)[a-z0-9_\\\\\\-]{35}\\W", r"(?i)(\\Wsig\\W|Secret(Value)?|IssuerSecret|(\\Wsas|primary|secondary|management|Shared(Access(Policy)?)?).?Key|\\.azure\\-devices\\.net|\\.(core|servicebus|redis\\.cache|accesscontrol|mediaservices)\\.(windows\\.net|chinacloudapi\\.cn|cloudapi\\.de|usgovcloudapi\\.net)|New\\-AzureRedisCache).{0,100}([a-z0-9/+]{43}=)", r"(?i)visualstudio\\.com.{1,100}\\W(?-i)[a-z2-7]{52}\\W", r"(?i)se=2021.+sig=[a-z0-9%]{43,63}%3d", r"(?i)(x-functions-key|ApiKey|Code=|\\.azurewebsites\\.net/api/).{0,100}[a-z0-9/\\+]{54}={2}", r"(?i)code=[a-z0-9%]{54,74}(%3d){2}", r"(?i)(userpwd|publishingpassword).{0,100}[a-z0-9/\\+]{60}\\W", r"(?i)[^a-z0-9/\\+][a-z0-9/\\+]{86}==", r"(?-i)\\-{5}BEGIN( ([DR]SA|EC|OPENSSH|PGP))? PRIVATE KEY( BLOCK)?\\-{5}", r"(?i)(app(lication)?|client)[_\\- ]?(key(url)?|secret)([\\s=:>]{1,10}|[\\s\"':=|>\\]]{3,15}|[\"'=:\\(]{2})[^\\-]", r"(?i)refresh[_\\-]?token([\\s=:>]{1,10}|[\\s\"':=|>\\]]{3,15}|[\"'=:\\(]{2})(\"data:text/plain,.+\"|[a-z0-9/+=_.-]{20,200})", r"(?i)AccessToken(Secret)?([\\s\"':=|>\\]]{3,15}|[\"'=:\\(]{2}|[\\s=:>]{1,10})[a-z0-9/+=_.-]{20,200}", r"(?i)[a-z0-9]{3,5}://[^%:\\s\"'/][^:\\s\"'/\\$]+[^:\\s\"'/\\$%]:([^%\\s\"'/][^@\\s\"'/]{0,100}[^%\\s\"'/])@[\\$a-z0-9:\\.\\-_%\\?=/]+", r"(?i)snmp(\\-server)?\\.exe.{0,100}(priv|community)", r"(?i)(ConvertTo\\-?SecureString\\s*((\\(|\\Wstring)\\s*)?['\"]+)", r"(?i)(Consumer|api)[_\\- ]?(Secret|Key)([\\s=:>]{1,10}|[\\s\"':=|>,\\]]{3,15}|[\"'=:\\(]{2})[^\\s]{5,}", r"(?i)authorization[,\\[:= \"']+([dbaohmnsv])", r"(?i)-u\\s+.{2,100}-p\\s+[^\\-/]", r"(?i)(amqp|ssh|(ht|f)tps?)://[^%:\\s\"'/][^:\\s\"'/\\$]+[^:\\s\"'/\\$%]:([^%\\s\"'/][^@\\s\"'/]{0,100}[^%\\s\"'/])@[\\$a-z0-9:\\.\\-_%\\?=/]+", r"(?i)(\\Waws|amazon)?.{0,5}(secret|access.?key).{0,10}\\W[a-z0-9/\\+]{40}", r"(?-i)(eyJ0eXAiOiJKV1Qi|eyJhbGci)", r"(?i)@(\\.(on)?)?microsoft\\.com[ -~\\s]{1,100}?(\\w?pass\\w?)", r"(?i)net(\\.exe)?.{1,5}(user\\s+|share\\s+/user:|user-?secrets? set)\\s+[a-z0-9]", r"(?i)xox[pbar]\\-[a-z0-9]", r"(?i)[\":\\s=]((x?corp|extranet(test)?|ntdev)(\\.microsoft\\.com)?|corp|redmond|europe|middleeast|northamerica|southpacific|southamerica|fareast|africa|exchange|extranet(test)?|partners|parttest|ntdev|ntwksta)\\W.{0,100}(password|\\Wpwd|\\Wpass|\\Wpw\\W|userpass)", r"(?i)(sign_in|SharePointOnlineAuthenticatedContext|(User|Exchange)Credentials?|password)[ -~\\s]{0,100}?@([a-z0-9.]+\\.(on)?)?microsoft\\.com['\"]?", r"(?i)(\\.database\\.azure\\.com|\\.database(\\.secure)?\\.windows\\.net|\\.cloudapp\\.net|\\.database\\.usgovcloudapi\\.net|\\.database\\.chinacloudapi\\.cn|\\.database.cloudapi.de).{0,100}(DB_PASS|(sql|service)?password|\\Wpwd\\W)", r"(?i)(secret(.?key)?|password)[\"']?\\s*[:=]\\s*[\"'][^\\s]+?[\"']", r"(?i)[^a-z\\$](DB_USER|user id|uid|(sql)?user(name)?|service\\s?account)\\s*[^\\w\\s,]([ -~\\s]{2,120}?|[ -~]{2,30}?)([^a-z\\s\\$]|\\s)\\s*(DB_PASS|(sql|service)?password|pwd)", r"(?i)(password|secret(key)?)[ \\t]*[=:]+[ \\t]*([^:\\s\"';,<]{2,200})", ] for (i, re_str) in enumerate(regex_list): if i != 0: if i == 27: regex_string += " and " else: regex_string += " or " regex_string += " " + column_name + " matches regex \"" + re_str + "\"" return where_clause.format(regex_string)

2. Azure Authentication

tenant_id = '' subscription_id = '' akv_name = '' client_id_name = '' client_secret_name = '' akv_link_name = '' group_name = '' adx_cluster_name = ''
client_id = mssparkutils.credentials.getSecret(akv_name, client_id_name, akv_link_name) client_secret = mssparkutils.credentials.getSecret(akv_name, client_secret_name, akv_link_name)
credential = ClientSecretCredential( tenant_id=tenant_id, client_id=client_id, client_secret=client_secret)
kusto_client = KustoManagementClient(credential, subscription_id = subscription_id) if kusto_client != None: cluster_list = list(kusto_client.clusters.list_by_resource_group(group_name)) if cluster_list != None: cluster_uris = [c.uri for c in cluster_list if c.name == adx_cluster_name] database_list = kusto_client.databases.list_by_cluster(group_name, cluster_name=adx_cluster_name) kcsb = KustoConnectionStringBuilder.with_aad_application_key_authentication(cluster_uris[0], client_id, client_secret, tenant_id) kusto_data_client = KustoClient(kcsb)

3. Azure Data Explorer Queries

if kusto_data_client != None and database_list != None: database_name_list = sorted([c.name for c in database_list]) for database_name in database_name_list: db_name = database_name.split("/")[1] tables = kusto_data_client.execute_mgmt(db_name, ".show tables details") table_list = dataframe_from_result_table(tables.primary_results[0]).TableName.tolist() if table_list != None: #table_dropdown = ipywidgets.Dropdown(options=sorted(table_list), description='Tables:') column_name = "*" for table in table_list: kql_where_clause = get_credscan_kql_where_clause(column_name) query = "{0} {1}".format(table, kql_where_clause) print(table) try: # Run query result = kusto_data_client.execute_query(database=db_name, query=query) # Display Result final_result = dataframe_from_result_table(result.primary_results[0]) if final_result.size != 0: display(final_result) except: print("=============================="); print(" This table got http error:") print("==============================");