Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
Azure
GitHub Repository: Azure/Azure-Sentinel-Notebooks
Path: blob/master/scenario-notebooks/Tools/PerfTools_Log Analytics Query.ipynb
3253 views
Kernel: Synapse PySpark

Performance Tools - Azure Log Analytics Query

Notebook Version: 1.0
Python Version: Python 3.8
Apache Spark Version: 3.1
Required Packages: azure-monitor-query, azure-mgmt-loganalytics
Platforms Supported: Azure Synapse Analytics

Data Source Required: Log Analytics custom table defined

Description

This notebook measures query performance against Azure Log Analytics using its data query API.
*** Please run the cells sequentially to avoid errors. Please do not use "run all cells". ***
Need to know more about KQL? Getting started with Kusto Query Language.

Table of Contents

  1. Warm-up

  2. Azure Log Analytics Data Queries

  3. Save result to ADX

1. Warm-up

# Load Python libraries that will be used in this notebook from azure.mgmt.resource import ResourceManagementClient from azure.mgmt.kusto import KustoManagementClient from azure.kusto.data.exceptions import KustoServiceError from azure.kusto.data.helpers import dataframe_from_result_table from azure.kusto.data import KustoClient, KustoConnectionStringBuilder, ClientRequestProperties from azure.kusto.ingest import ( BlobDescriptor, FileDescriptor, IngestionProperties, IngestionStatus, KustoStreamingIngestClient, ManagedStreamingIngestClient, QueuedIngestClient, StreamDescriptor, ) from azure.identity import AzureCliCredential, DefaultAzureCredential, ClientSecretCredential from azure.core.exceptions import HttpResponseError from azure.mgmt.loganalytics import LogAnalyticsManagementClient from azure.monitor.query import LogsQueryClient, MetricsQueryClient, LogsQueryStatus import sys from datetime import datetime, timezone, timedelta import requests import pandas as pd import numpy import json import math import ipywidgets from IPython.display import display, HTML, Markdown
tenant_id = "" subscription_id = "" akv_name = "" client_id_name = "" client_secret_name = "" akv_link_name = ""
# User input for Log Analytics workspace as the data source for querying subscription_id_source = "" resource_group_name_source = "" workspace_name_source = "" workspace_id_source = "" workspace_resource_id_source = "/subscriptions/{0}/resourceGroups/{1}/providers/Microsoft.OperationalInsights/workspaces/{2}".format(subscription_id_source, resource_group_name_source, workspace_name_source)
# You may need to change resource_uri for various cloud environments. resource_uri = "https://api.loganalytics.io" client_id = mssparkutils.credentials.getSecret(akv_name, client_id_name, akv_link_name) client_secret = mssparkutils.credentials.getSecret(akv_name, client_secret_name, akv_link_name) credential = ClientSecretCredential( tenant_id=tenant_id, client_id=client_id, client_secret=client_secret) access_token = credential.get_token(resource_uri + "/.default") token = access_token[0]

2. Azure Log Analytics Data Queries

# Functions for query def query_la(workspace_id_query, query): la_data_client = LogsQueryClient(credential=credential) end_time = datetime.now(timezone.utc) start_time = end_time - timedelta(7) print(query) print('starting ' + str(end_time.timestamp())) la_data_client.query_workspace( workspace_id=workspace_id_query, query=query, timespan=(start_time, end_time)) final_time = datetime.now(timezone.utc) print('ending ' + str(final_time.timestamp())) return (final_time - end_time).total_seconds() def slice_query_la(query, lookback_start, lookback_end='0', lookback_unit='h', query_row_limit=400000, split_factor=2): "Slice the time to render records <= 500K" count_query = query.format(lookback_start, lookback_unit, lookback_end) count = ' | summarize count()' count_query = count_query + count df_count = query_la(workspace_id_source, count_query) row_count = df_count['count_'][0] print(count_query) print(row_count) df_final = pd.DataFrame() if row_count > query_row_limit: number_of_divide = 0 while row_count > query_row_limit: row_count = row_count / split_factor number_of_divide = number_of_divide + 1 factor = split_factor ** number_of_divide step_number = math.ceil(int(lookback_start) / factor) if factor > int(lookback_start) and lookback_unit == 'h': lookback_unit = 'm' number_of_minutes = 60 step_number = math.ceil(int(lookback_start)*number_of_minutes / factor) try: for i in range(int(lookback_end), factor + 1, 1): if i > 0: df_la_query = pd.DataFrame current_query = query.format(i * step_number, lookback_unit, (i - 1) * step_number) print(current_query) df_la_query = query_la(workspace_id_source, current_query) print(df_la_query.shape[0]) df_final = pd.concat([df_final, df_la_query]) except: print("query failed") raise else: df_final = query_la(workspace_id_source, query.format(lookback_start, lookback_unit, lookback_end)) return df_final

Simple Data Query

lookback_period = '24' lookback_unit = 'h' number_of_item = '1000' query = "CommonSecurityLog | where TimeGenerated >= ago({0}{1}) | take {2}".format(lookback_period, lookback_unit, number_of_item) execution_seconds = query_la(workspace_id_source, query)

3. Save result to ADX

cluster = "https://ingest-nbauto.eastus.kusto.windows.net" kcsb = KustoConnectionStringBuilder.with_aad_application_key_authentication(cluster, client_id, client_secret, tenant_id) kusto_client = QueuedIngestClient(kcsb) ingestion_props = IngestionProperties( database="nba1", table="usxperf", ) fields = ["TimeGenerated", "Service", "Query", "TimeInSeconds"] rows = [[datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%S.%fZ"), "Log Analytics Draft", query, execution_seconds]] df = pd.DataFrame(data=rows, columns=fields) kusto_client.ingest_from_dataframe(df, ingestion_properties=ingestion_props)