Performance Tools - Azure Log Analytics Query

Notebook Version: 1.0
Python Version: Python 3.8
Apache Spark Version: 3.1
Required Packages: azure-monitor-query, azure-mgmt-loganalytics
Platforms Supported: Azure Synapse Analytics

Data Source Required: Log Analytics custom table defined

Description

This notebook measures query performance against Azure Log Analytics using its data query API.
*** Please run the cells sequentially to avoid errors. Please do not use "run all cells". ***
Need to know more about KQL? Getting started with Kusto Query Language.

Warm-up
Azure Log Analytics Data Queries
Save result to ADX

1. Warm-up

In [ ]:

# Load Python libraries that will be used in this notebook
from azure.mgmt.resource import ResourceManagementClient
from azure.mgmt.kusto import KustoManagementClient
from azure.kusto.data.exceptions import KustoServiceError
from azure.kusto.data.helpers import dataframe_from_result_table
from azure.kusto.data import KustoClient, KustoConnectionStringBuilder, ClientRequestProperties

from azure.kusto.ingest import (
    BlobDescriptor,
    FileDescriptor,
    IngestionProperties,
    IngestionStatus,
    KustoStreamingIngestClient,
    ManagedStreamingIngestClient,
    QueuedIngestClient,
    StreamDescriptor,
)

from azure.identity import AzureCliCredential, DefaultAzureCredential, ClientSecretCredential
from azure.core.exceptions import  HttpResponseError 
from azure.mgmt.loganalytics import LogAnalyticsManagementClient
from azure.monitor.query import LogsQueryClient, MetricsQueryClient, LogsQueryStatus

import sys
from datetime import datetime, timezone, timedelta
import requests
import pandas as pd
import numpy
import json
import math
import ipywidgets
from IPython.display import display, HTML, Markdown

In [ ]:

tenant_id = ""
subscription_id = ""

akv_name = ""
client_id_name = ""
client_secret_name = ""
akv_link_name = ""

In [ ]:

# User input for Log Analytics workspace as the data source for querying
subscription_id_source = ""
resource_group_name_source = ""
workspace_name_source = ""
workspace_id_source = ""
workspace_resource_id_source = "/subscriptions/{0}/resourceGroups/{1}/providers/Microsoft.OperationalInsights/workspaces/{2}".format(subscription_id_source, resource_group_name_source, workspace_name_source)

In [ ]:

# You may need to change resource_uri for various cloud environments.
resource_uri = "https://api.loganalytics.io"
client_id = mssparkutils.credentials.getSecret(akv_name, client_id_name, akv_link_name)
client_secret = mssparkutils.credentials.getSecret(akv_name, client_secret_name, akv_link_name)

credential = ClientSecretCredential(
    tenant_id=tenant_id, 
    client_id=client_id, 
    client_secret=client_secret)
access_token = credential.get_token(resource_uri + "/.default")
token = access_token[0]

2. Azure Log Analytics Data Queries

In [ ]:

# Functions for query
def query_la(workspace_id_query, query):
    la_data_client = LogsQueryClient(credential=credential)
    end_time =  datetime.now(timezone.utc)
    start_time = end_time - timedelta(7)

    print(query)
    print('starting ' + str(end_time.timestamp()))
    la_data_client.query_workspace(
        workspace_id=workspace_id_query,
        query=query,
        timespan=(start_time, end_time))
    final_time =  datetime.now(timezone.utc)
    print('ending ' + str(final_time.timestamp()))
    return (final_time - end_time).total_seconds()


def slice_query_la(query, lookback_start, lookback_end='0', lookback_unit='h', query_row_limit=400000, split_factor=2):
    "Slice the time to render records <= 500K"
    count_query = query.format(lookback_start, lookback_unit, lookback_end)
    count = ' | summarize count()'
    count_query = count_query + count
    df_count = query_la(workspace_id_source, count_query)
    row_count = df_count['count_'][0]
    print(count_query)
    print(row_count)
    df_final = pd.DataFrame()

    if row_count > query_row_limit:
        number_of_divide = 0
        while row_count > query_row_limit:
            row_count = row_count / split_factor
            number_of_divide = number_of_divide + 1

        factor = split_factor ** number_of_divide
        step_number = math.ceil(int(lookback_start) / factor)
        if factor > int(lookback_start) and lookback_unit == 'h':
            lookback_unit = 'm'
            number_of_minutes = 60
            step_number = math.ceil(int(lookback_start)*number_of_minutes / factor)

        try:
            for i in range(int(lookback_end), factor + 1, 1):
                if i > 0:
                    df_la_query = pd.DataFrame
                    current_query = query.format(i * step_number, lookback_unit, (i - 1) * step_number)
                    print(current_query)
                    df_la_query = query_la(workspace_id_source, current_query)
                    print(df_la_query.shape[0])
                    df_final = pd.concat([df_final, df_la_query])
        except:
            print("query failed")
            raise
    else:
        df_final = query_la(workspace_id_source, query.format(lookback_start, lookback_unit, lookback_end))

    return df_final

Simple Data Query

In [ ]:

lookback_period = '24'
lookback_unit = 'h'
number_of_item = '1000'
query = "CommonSecurityLog | where TimeGenerated >= ago({0}{1}) | take {2}".format(lookback_period, lookback_unit, number_of_item)

execution_seconds = query_la(workspace_id_source, query)

3. Save result to ADX

In [ ]:

cluster = "https://ingest-nbauto.eastus.kusto.windows.net"
kcsb = KustoConnectionStringBuilder.with_aad_application_key_authentication(cluster, client_id, client_secret, tenant_id)
kusto_client = QueuedIngestClient(kcsb)

ingestion_props = IngestionProperties(
    database="nba1",
    table="usxperf",
)

fields = ["TimeGenerated", "Service", "Query", "TimeInSeconds"]
rows = [[datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%S.%fZ"), "Log Analytics Draft", query, execution_seconds]]
df = pd.DataFrame(data=rows, columns=fields)
kusto_client.ingest_from_dataframe(df, ingestion_properties=ingestion_props)

Performance Tools - Azure Log Analytics Query

Description

Table of Contents

1. Warm-up

2. Azure Log Analytics Data Queries

Simple Data Query

3. Save result to ADX

Product

Resources

Company