Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
Azure
GitHub Repository: Azure/Azure-Sentinel-Notebooks
Path: blob/master/scenario-notebooks/Automated-Notebooks/AutomationGallery-CredentialScanOnAzureLogAnalytics.ipynb
3253 views
Kernel: Synapse PySpark

Automation Gallery - Credential Scan on Azure Log Analytics

Notebook Version: 1.0
Python Version: Python 3.8
Apache Spark Version: 3.1
Required Packages: azure-monitor-query, azure-mgmt-loganalytics
Platforms Supported: Azure Synapse Analytics

Data Source Required: Log Analytics tables

Description

This notebook provides step-by-step instructions and sample code to detect credential leak into Azure Log Analytics using Azure SDK for Python and KQL.
*** Please run the cells sequentially to avoid errors. Please do not use "run all cells". ***
Need to know more about KQL? Getting started with Kusto Query Language.

Table of Contents

  1. Warm-up

  2. Azure Authentication

  3. Azure Log Analytics Data Queries

  4. Save result to Microsoft Sentinel Dynamic Summaries

1. Warm-up

# Load Python libraries that will be used in this notebook from azure.mgmt.loganalytics import LogAnalyticsManagementClient from azure.monitor.query import LogsQueryClient, MetricsQueryClient, LogsQueryStatus from azure.identity import AzureCliCredential, DefaultAzureCredential, ClientSecretCredential from azure.core.exceptions import HttpResponseError from datetime import datetime, timezone, timedelta import pandas as pd import json import re import ipywidgets from IPython.display import display, HTML, Markdown
# Functions will be used in this notebook def get_credscan_kql_where_clause(column_name): "This function return the KQL where clause for credscan" where_clause = " | where TimeGenerated > ago({0}) | where {1} " time_range = "7d" regex_string = "" regex_list = [ r"(?i)(ida:password|IssuerSecret|(api|client|app(lication)?)[_\\- ]?(key|secret)[^,a-z]|\\.azuredatabricks\\.net).{0,10}(dapi)?[a-z0-9/+]{22}", r"(?i)(x-api-(key|token).{0,10}[a-z0-9/+]{40}|v1\\.[a-z0-9/+]{40}[^a-z0-9/+])", r"(?-i)\\WAIza(?i)[a-z0-9_\\\\\\-]{35}\\W", r"(?i)(\\Wsig\\W|Secret(Value)?|IssuerSecret|(\\Wsas|primary|secondary|management|Shared(Access(Policy)?)?).?Key|\\.azure\\-devices\\.net|\\.(core|servicebus|redis\\.cache|accesscontrol|mediaservices)\\.(windows\\.net|chinacloudapi\\.cn|cloudapi\\.de|usgovcloudapi\\.net)|New\\-AzureRedisCache).{0,100}([a-z0-9/+]{43}=)", r"(?i)visualstudio\\.com.{1,100}\\W(?-i)[a-z2-7]{52}\\W", r"(?i)se=2021.+sig=[a-z0-9%]{43,63}%3d", r"(?i)(x-functions-key|ApiKey|Code=|\\.azurewebsites\\.net/api/).{0,100}[a-z0-9/\\+]{54}={2}", r"(?i)code=[a-z0-9%]{54,74}(%3d){2}", r"(?i)(userpwd|publishingpassword).{0,100}[a-z0-9/\\+]{60}\\W", r"(?i)[^a-z0-9/\\+][a-z0-9/\\+]{86}==", r"(?-i)\\-{5}BEGIN( ([DR]SA|EC|OPENSSH|PGP))? PRIVATE KEY( BLOCK)?\\-{5}", r"(?i)(app(lication)?|client)[_\\- ]?(key(url)?|secret)([\\s=:>]{1,10}|[\\s\"':=|>\\]]{3,15}|[\"'=:\\(]{2})[^\\-]", r"(?i)refresh[_\\-]?token([\\s=:>]{1,10}|[\\s\"':=|>\\]]{3,15}|[\"'=:\\(]{2})(\"data:text/plain,.+\"|[a-z0-9/+=_.-]{20,200})", r"(?i)AccessToken(Secret)?([\\s\"':=|>\\]]{3,15}|[\"'=:\\(]{2}|[\\s=:>]{1,10})[a-z0-9/+=_.-]{20,200}", r"(?i)[a-z0-9]{3,5}://[^%:\\s\"'/][^:\\s\"'/\\$]+[^:\\s\"'/\\$%]:([^%\\s\"'/][^@\\s\"'/]{0,100}[^%\\s\"'/])@[\\$a-z0-9:\\.\\-_%\\?=/]+", r"(?i)snmp(\\-server)?\\.exe.{0,100}(priv|community)", r"(?i)(ConvertTo\\-?SecureString\\s*((\\(|\\Wstring)\\s*)?['\"]+)", r"(?i)(Consumer|api)[_\\- ]?(Secret|Key)([\\s=:>]{1,10}|[\\s\"':=|>,\\]]{3,15}|[\"'=:\\(]{2})[^\\s]{5,}", r"(?i)authorization[,\\[:= \"']+([dbaohmnsv])", r"(?i)-u\\s+.{2,100}-p\\s+[^\\-/]", r"(?i)(amqp|ssh|(ht|f)tps?)://[^%:\\s\"'/][^:\\s\"'/\\$]+[^:\\s\"'/\\$%]:([^%\\s\"'/][^@\\s\"'/]{0,100}[^%\\s\"'/])@[\\$a-z0-9:\\.\\-_%\\?=/]+", r"(?i)(\\Waws|amazon)?.{0,5}(secret|access.?key).{0,10}\\W[a-z0-9/\\+]{40}", r"(?-i)(eyJ0eXAiOiJKV1Qi|eyJhbGci)", r"(?i)@(\\.(on)?)?microsoft\\.com[ -~\\s]{1,100}?(\\w?pass\\w?)", r"(?i)net(\\.exe)?.{1,5}(user\\s+|share\\s+/user:|user-?secrets? set)\\s+[a-z0-9]", r"(?i)xox[pbar]\\-[a-z0-9]", r"(?i)[\":\\s=]((x?corp|extranet(test)?|ntdev)(\\.microsoft\\.com)?|corp|redmond|europe|middleeast|northamerica|southpacific|southamerica|fareast|africa|exchange|extranet(test)?|partners|parttest|ntdev|ntwksta)\\W.{0,100}(password|\\Wpwd|\\Wpass|\\Wpw\\W|userpass)", r"(?i)(sign_in|SharePointOnlineAuthenticatedContext|(User|Exchange)Credentials?|password)[ -~\\s]{0,100}?@([a-z0-9.]+\\.(on)?)?microsoft\\.com['\"]?", r"(?i)(\\.database\\.azure\\.com|\\.database(\\.secure)?\\.windows\\.net|\\.cloudapp\\.net|\\.database\\.usgovcloudapi\\.net|\\.database\\.chinacloudapi\\.cn|\\.database.cloudapi.de).{0,100}(DB_PASS|(sql|service)?password|\\Wpwd\\W)", r"(?i)(secret(.?key)?|password)[\"']?\\s*[:=]\\s*[\"'][^\\s]+?[\"']", r"(?i)[^a-z\\$](DB_USER|user id|uid|(sql)?user(name)?|service\\s?account)\\s*[^\\w\\s,]([ -~\\s]{2,120}?|[ -~]{2,30}?)([^a-z\\s\\$]|\\s)\\s*(DB_PASS|(sql|service)?password|pwd)", r"(?i)(password|secret(key)?)[ \\t]*[=:]+[ \\t]*([^:\\s\"';,<]{2,200})", ] for (i, re_str) in enumerate(regex_list): if i != 0: if i == 27: regex_string += " and " else: regex_string += " or " if column_name == "*": regex_string += " " + column_name + " matches regex \"" + re_str + "\"" else: regex_string += " tostring(" + column_name + ") matches regex \"" + re_str + "\"" return where_clause.format(time_range, regex_string) def filter_column(comumn_name): "This function will be used to filter out columns that you don't want to run KQL against (True). You may customize the filter to meet your requirements" if column_name.find('Description') >= 0: return False elif column_name.find('Id') >= 0 or column_name.find('TimeGenerated') >= 0: return True else: regex_str = '_[a-z]' re.compile(regex_str) results = re.findall(regex_str, comumn_name) if results: return True
import uuid import requests class DynamicSummary(): """ Dynamic Summary object model """ @staticmethod def get_new_guid(): """ generate new GUID """ return uuid.uuid4() def __init__(self, summary_id): self.summary_id = summary_id def serialize(self): serialized_str = '"summaryId": "' + self.summary_id + '", "summaryName": "' + self.summary_name + '", "azureTenantId": "' + self.azure_tenant_id + '", "summaryDescription": "' + self.summary_description + '"' if hasattr(self, 'relation_name') and self.relation_name != None: serialized_str += ', "relationName": "' + self.relation_name + '"' if hasattr(self, 'relation_id') and self.relation_id != None: serialized_str += ', "relationId": "' + self.relation_id + '"' if hasattr(self, 'search_key') and self.search_key != None: serialized_str += ', "searchKey": "' + self.search_key + '"' if hasattr(self, 'tactics') and self.tactics != None: serialized_str += ', "tactics": "' + self.tactics + '"' if hasattr(self, 'techniques') and self.techniques != None: serialized_str += ', "techniques": "' + self.techniques + '"' if hasattr(self, 'source_info') and self.source_info != None: serialized_str += ', "sourceInfo": "' + self.source_info + '"' if hasattr(self, 'summary_items') and self.summary_items != None: serialized_str += ', "rawContent": "[' + DynamicSummary.serializeItems(self.summary_items) + ']"' return serialized_str def serializeItems(items): raw_content = '' isFirst = True for item in items: if isFirst == True: isFirst = False else: raw_content += ',' raw_content += json.dumps(DynamicSummary.serializeItem(item)).strip('"') return raw_content def serializeItem(item): serialized_item_tsr = '{' serialized_item_tsr += '"summaryItemId": "' + item.summary_item_id.urn[9:] + '"' if hasattr(item, 'relation_name') and item.relation_name != None: serialized_item_tsr += ', "relationName": "' + item.relation_name + '"' if hasattr(item, 'relation_id') and item.relation_id != None: seriserialized_item_tsralized_str += ', "relationId" :"' + item.relation_id + '"' if hasattr(item, 'search_key') and item.search_key != None: serialized_item_tsr += ', "searchKey": "' + item.search_key + '"' if hasattr(item, 'tactics') and item.tactics != None: serialized_item_tsr += ', "tactics": "' + item.tactics + '"' if hasattr(item, 'techniques') and item.techniques != None: serialized_item_tsr += ', "techniques": "' + item.techniques + '"' if hasattr(item, 'event_time_utc') and item.event_time_utc != None: serialized_item_tsr += ', "eventTimeUTC" :"' + item.event_time_utc.isoformat() + 'Z"' if hasattr(item, 'observable_type') and item.observable_type != None: serialized_item_tsr += ', "observableType": "' + item.observable_type + '"' if hasattr(item, 'observable_value') and item.observable_value != None: serialized_item_tsr += ', "observableValue": "' + item.observable_value + '"' if hasattr(item, 'packed_content') and item.packed_content != None: serialized_item_tsr += ', "packedContent": ' + item.packed_content serialized_item_tsr += '}' return serialized_item_tsr def construct_summary(self, tenant_id, summary_name, summary_description, items, \ relation_name=None, relation_id=None, search_key=None, tactics=None, techniques=None, source_info=None, **kwargs): """ Building summary level data object """ self.summary_name = summary_name self.azure_tenant_id = tenant_id self.summary_description = summary_description if relation_name != None: self.relation_name = relation_name if relation_id != None: self.relation_id = relation_id if search_key != None: self.search_key = search_key if tactics != None: self.tactics = tactics if techniques != None: self.techniques = techniques if source_info != None: self.source_info = source_info if summary_items != None: self.summary_items = items def construct_summary_item(self, summary_item_id, \ relation_name=None, relation_id=None, search_key=None, tactics=None, techniques=None, event_time_utc=None, observable_type=None, observable_value=None, packed_content=None, **kwargs): """ Building summary item level data object """ item = DynamicSummary(self.summary_id) item.summary_item_id = summary_item_id if relation_name != None: item.relation_name = relation_name if relation_id != None: item.relation_id = relation_id if search_key != None: item.search_key = search_key if tactics != None: item.tactics = tactics if techniques != None: item.techniques = techniques if event_time_utc != None: item.event_time_utc = event_time_utc if observable_type != None: item.observable_type = observable_type if observable_value != None: item.observable_value = observable_value if packed_content != None: item.packed_content = packed_content return item def construct_arm_rest_url(subscription_id, resource_group, workspace_name, summary_guid): "Build URL for Sentinel Dynamic Summaries REST API" api_version = "2023-03-01-preview" provider_name = "Microsoft.OperationalInsights" workspace_provider_name = "Microsoft.SecurityInsights/dynamicSummaries" root_url = "https://management.azure.com" arm_rest_url_template = "{0}/subscriptions/{1}/resourceGroups/{2}/providers/{3}/workspaces/{4}/providers/{5}/{6}?api-version={7}" return arm_rest_url_template.format(root_url, subscription_id, resource_group, provider_name, workspace_name, workspace_provider_name, summary_guid, api_version) def call_azure_rest_api_for_creating_dynamic_summary(token, arm_rest_url, summary): "Calling Microsoft Sentinel REST API" bearer_token = "Bearer " + token headers = {"Authorization": bearer_token, "content-type":"application/json" } response = requests.put(arm_rest_url, headers=headers, data=summary, verify=True) return response def display_result(response): "Display the result set as pandas.DataFrame" if response != None: df = pd.DataFrame(response.json()["value"]) display(df)

2. Azure Authentication

tenant_id = '' subscription_id = '' akv_name = '' akv_link_name = '' workspace_id = '' client_id_name = '' client_secret_name = '' resource_group_name_for_dynamic_summaries = '' sentinel_workspace_name_for_dynamic_summaries = '' dynamic_summary_name = '' dynamic_summary_guid = ''
# You may need to change resource_uri for various cloud environments. resource_uri = "https://api.loganalytics.io" client_id = mssparkutils.credentials.getSecret(akv_name, client_id_name, akv_link_name) client_secret = mssparkutils.credentials.getSecret(akv_name, client_secret_name, akv_link_name) credential = ClientSecretCredential( tenant_id=tenant_id, client_id=client_id, client_secret=client_secret) access_token = credential.get_token(resource_uri + "/.default") token = access_token[0] la_data_client = LogsQueryClient(credential=credential)

3. Azure Log Analytics Data Queries

# Get all tables available using Kusto query language. If you need to know more about KQL, please check out the link provided at the introductory section. tables_result = None table_list = None end_time = datetime.now(timezone.utc) start_time = end_time - timedelta(1) all_tables_query = "union withsource = SentinelTableName * | distinct SentinelTableName | sort by SentinelTableName asc" tables_result = la_data_client.query_workspace( workspace_id=workspace_id, query=all_tables_query, timespan=(start_time, end_time)) if tables_result.status == LogsQueryStatus.SUCCESS: df_table = pd.DataFrame(data=tables_result.tables[0].rows, columns=tables_result.tables[0].columns) table_list = list(df_table["SentinelTableName"]) column_name = "*" df_total = pd.DataFrame() df_list = [] for table_name in table_list: print('Table name: ' + table_name) column_name = "*" kql_where_clause = get_credscan_kql_where_clause(column_name) table_query = "{0} {1}".format(table_name, kql_where_clause) # Run query try: try_result = la_data_client.query_workspace( workspace_id=workspace_id, query=table_query, timespan=(start_time, end_time)) df_try = pd.DataFrame(data=try_result.tables[0].rows, columns=try_result.tables[0].columns) if not df_try.empty: all_columns_query = "let ColumnList = " + table_name + " | getschema | project ColumnName; ColumnList " columns_result = la_data_client.query_workspace( workspace_id=workspace_id, query=all_columns_query, timespan=(start_time, end_time)) df_column = pd.DataFrame(data=columns_result.tables[0].rows, columns=columns_result.tables[0].columns) column_list = list(df_column["ColumnName"]) for column_name in column_list: # Now checking each column if filter_column(column_name): continue kql_where_clause = get_credscan_kql_where_clause(column_name) col_query = "{0} {1} | extend ColumnName='{2}', RegexResult={2} | project ColumnName, RegexResult".format(table_name, kql_where_clause, column_name) # Run query try: single_column_result = la_data_client.query_workspace( workspace_id=workspace_id, query=col_query, timespan=(start_time, end_time)) # process result df_single_col = pd.DataFrame(data=single_column_result.tables[0].rows, columns=single_column_result.tables[0].columns) if not df_single_col.empty: print('Column name: ' + column_name) df_total = df_total.append(df_single_col) except Exception as ex: print("=============Exception========") print(ex) print("==============================") else: print("Not leak found.") except HttpResponseError as error: print("==============================") print(" This table got http error:") print(" message:" + error.message) print(" reason:" + error.reason) print("==============================") if not df_total.empty: print('results:') pd.options.display.max_columns = None display(df_total) else: print('--- No leak ---')

4. Save result to Microsoft Sentinel Dynamic Summaries

if not df_total.empty and dynamic_summary_name != None and dynamic_summary_name != '': summary = DynamicSummary(dynamic_summary_guid) summary_description = "This summary is generated from notebook - AutomationGallery-CredentialScanOnAzureLogAnalytics." summary_items = [] for index, row in df_total.iterrows(): packed_content = df_total.iloc[index].to_json() summary_items.append(summary.construct_summary_item(DynamicSummary.get_new_guid(), None, None, None, None, None, datetime.utcnow(), None, None, packed_content)) summary.construct_summary(tenant_id, dynamic_summary_name, summary_description, summary_items) summary_json = "{ \"properties\": {" + summary.serialize() + "}}" print(summary_json)
if not df_total.empty and dynamic_summary_name != None and dynamic_summary_name != '': dyn_sum_api_url = DynamicSummary.construct_arm_rest_url(subscription_id, resource_group_name_for_dynamic_summaries, sentinel_workspace_name_for_dynamic_summaries, dynamic_summary_guid) response = DynamicSummary.call_azure_rest_api_for_creating_dynamic_summary(token, dyn_sum_api_url, summary_json) print(response.status_code)