Path: blob/master/scenario-notebooks/Tools/PerfTools_Log Analytics_CustomTable_Setup.ipynb
3253 views
Kernel: Synapse PySpark
Performance Tools - Log Analytics Custom Table Setup
Notebook Version: 1.0
Python Version: Python 3.8
Apache Spark Version: 3.1
Required Packages: azure-monitor-query, azure-mgmt-loganalytics
Platforms Supported: Azure Synapse Analytics
Data Source Required: No
Description
This notebook creates data collection endpoint, custom table, and data collect rules for Azure Log Analytics.
*** Please run the cells sequentially to avoid errors. Please do not use "run all cells". ***
Table of Contents
Warm-up
Azure Authentication
Create Data Collection Endpoint (DCE)
Create Custom Table
Create Data Collection Rule (DCR)
1. Warm-up
In [ ]:
%pip install azure.monitor.query
In [ ]:
from azure.identity import AzureCliCredential, DefaultAzureCredential, ClientSecretCredential from azure.core.exceptions import HttpResponseError from datetime import datetime, timezone, timedelta import json from IPython.display import display, HTML, Markdown
In [ ]:
# User Inputs section 1 tenant_id = "" subscription_id = "" workspace_id = "" # Azure KV for accessing service principal info akv_name = "" client_id_name = "" client_secret_name = "" akv_link_name = "" # User Inputs section 2 # Parameters for provisioning resources resource_group_name = "" location = "" workspace_name = "" workspace_resource_id = "/subscriptions/{0}/resourceGroups/{1}/providers/Microsoft.OperationalInsights/workspaces/{2}".format(subscription_id, resource_group_name, workspace_name) data_collection_endpoint_name = "" data_collection_rule_name = "" custom_table_name = "" custom_table_full_name = "Custom-" + custom_table_name
2. Azure Authentication
In [ ]:
# You may need to change resource_uri for various cloud environments. resource_uri = "https://api.loganalytics.io" client_id = mssparkutils.credentials.getSecret(akv_name, client_id_name, akv_link_name) client_secret = mssparkutils.credentials.getSecret(akv_name, client_secret_name, akv_link_name) credential = ClientSecretCredential( tenant_id=tenant_id, client_id=client_id, client_secret=client_secret) access_token = credential.get_token(resource_uri + "/.default") token = access_token[0]
3. Create Data Collection Endpoint (DCE)
In [ ]:
dce_json_string = """ { "$schema": "https://schema.management.azure.com/schemas/2019-04-01/deploymentTemplate.json#", "contentVersion": "1.0.0.0", "parameters": { "dataCollectionEndpointName": { "type": "string", "metadata": { "description": "Specifies the name of the Data Collection Endpoint to create." } }, "location": { "type": "string", "defaultValue": "eastus", "metadata": { "description": "Specifies the location for the Data Collection Endpoint." } } }, "resources": [ { "type": "Microsoft.Insights/dataCollectionEndpoints", "name": "[parameters('dataCollectionEndpointName')]", "location": "[parameters('location')]", "apiVersion": "2021-04-01", "properties": { "networkAcls": { "publicNetworkAccess": "Enabled" } } } ], "outputs": { "dataCollectionEndpointId": { "type": "string", "value": "[resourceId('Microsoft.Insights/dataCollectionEndpoints', parameters('dataCollectionEndpointName'))]" }, "endpoint": { "type": "object", "value": "[reference(resourceId('Microsoft.Insights/dataCollectionEndpoints', parameters('dataCollectionEndpointName'))).logsIngestion]" } } } """
In [ ]:
from azure.mgmt.resource import ResourceManagementClient from azure.mgmt.resource.resources.models import DeploymentMode resource_client = ResourceManagementClient(credential, subscription_id) template_body = json.loads(dce_json_string) rg_deployment_result = resource_client.deployments.begin_create_or_update( resource_group_name, "exampleDeployment", { "properties": { "template": template_body, "parameters": { "location": { "value": location }, "dataCollectionEndpointName": { "value": data_collection_endpoint_name }, }, "mode": DeploymentMode.incremental } } )
In [ ]:
dce_res_id = '' dce_endpoint = '' if rg_deployment_result.status() != "Succeeded": print(rg_deployment_result.status()) print('Run the cell until stauts=Succeeded or when you see Failed.') else: dce_resource_id = rg_deployment_result.result().properties.outputs["dataCollectionEndpointId"].get("value") dce_endpoint = rg_deployment_result.result().properties.outputs["endpoint"].get("value")['endpoint'] print('You will need DCE Endpoint for future data ingestion!') print('DCE Endpoint: ' + dce_endpoint)
4. Create Custom Table
In [ ]:
# Please replace columns info with your own columns cus_table_json_string = """ { "$schema": "https://schema.management.azure.com/schemas/2019-04-01/deploymentTemplate.json#", "contentVersion": "1.0.0.0", "parameters": { "workspaceName": { "type": "string", "metadata": { "description": "LA workspace name." } }, "customTableName": { "type": "string", "metadata": { "description": "table name." } } }, "resources": [ { "type": "Microsoft.OperationalInsights/workspaces/tables", "apiVersion": "2021-12-01-preview", "name": "[concat(parameters('workspaceName'), '/', parameters('customTableName'))]", "kind": "CustomLog", "properties": { "totalRetentionInDays": 90, "plan": "Analytics", "schema": { "name": "[parameters('customTableName')]", "columns": [ { "name": "TimeGenerated", "type": "datetime" }, { "name": "TimeInSeconds", "type": "real" }, { "name": "QueryBody", "type": "string" } ] }, "retentionInDays": 90 } } ], "outputs": { "streamName": { "type": "string", "value": "[concat('Custom-', parameters('customTableName'))]" } } } """
In [ ]:
from azure.mgmt.resource import ResourceManagementClient from azure.mgmt.resource.resources.models import DeploymentMode resource_client = ResourceManagementClient(credential, subscription_id) template_body = json.loads(cus_table_json_string) table_tag = "defaultct" rg_deployment_result = resource_client.deployments.begin_create_or_update( resource_group_name, table_tag, { "properties": { "template": template_body, "parameters": { "workspaceName": { "value": workspace_name }, "customTableName": { "value": custom_table_name } }, "mode": DeploymentMode.incremental } } )
In [ ]:
stream_name = '' if rg_deployment_result.status() != "Succeeded": print(rg_deployment_result.status()) print('Run the cell until stauts=Succeeded or when you see Failed.') else: stream_name = rg_deployment_result.result().properties.outputs["streamName"].get("value") print('You will need full stream name for future data ingestion!') print('Stream Name: ' + stream_name)
5. Create Data Collection Rule (DCR)
In [ ]:
# Please replace columns info with your own columns dcr_json_string = """ { "$schema": "https://schema.management.azure.com/schemas/2019-04-01/deploymentTemplate.json#", "contentVersion": "1.0.0.0", "parameters": { "dataCollectionRuleName": { "type": "string", "metadata": { "description": "Specifies the name of the Data Collection Rule to create." } }, "location": { "type": "string", "metadata": { "description": "Specifies the location in which to create the Data Collection Rule." } }, "workspaceResourceId": { "type": "string", "metadata": { "description": "Specifies the Azure resource ID of the Log Analytics workspace to use." } }, "workspaceName": { "type": "string", "metadata": { "description": "LA workspace name." } }, "endpointResourceId": { "type": "string", "metadata": { "description": "Specifies the Azure resource ID of the Data Collection Endpoint to use." } }, "customTableFullName": { "type": "string", "metadata": { "description": "table name." } } }, "resources": [ { "type": "Microsoft.Insights/dataCollectionRules", "name": "[parameters('dataCollectionRuleName')]", "location": "[parameters('location')]", "apiVersion": "2021-09-01-preview", "properties": { "dataCollectionEndpointId": "[parameters('endpointResourceId')]", "streamDeclarations": { "[parameters('customTableFullName')]": { "columns": [ { "name": "TimeGenerated", "type": "datetime" }, { "name": "TimeInSeconds", "type": "real" }, { "name": "QueryBody", "type": "string" } ] } }, "destinations": { "logAnalytics": [ { "workspaceResourceId": "[parameters('workspaceResourceId')]", "name": "[parameters('workspaceName')]" } ] }, "dataFlows": [ { "streams": [ "[parameters('customTableFullName')]" ], "destinations": [ "[parameters('workspaceName')]" ], "transformKql": "source", "outputStream": "[parameters('customTableFullName')]" } ] } } ], "outputs": { "dataCollectionRuleId": { "type": "string", "value": "[resourceId('Microsoft.Insights/dataCollectionRules', parameters('dataCollectionRuleName'))]" }, "immutableId": { "type": "string", "value": "[reference(resourceId('Microsoft.Insights/dataCollectionRules', parameters('dataCollectionRuleName'))).immutableId]" } } } """
In [ ]:
from azure.mgmt.resource import ResourceManagementClient from azure.mgmt.resource.resources.models import DeploymentMode resource_client = ResourceManagementClient(credential, subscription_id) template_body = json.loads(dcr_json_string) dce_resource_id = '/subscriptions/{0}/resourceGroups/{1}/providers/Microsoft.Insights/dataCollectionEndpoints/{2}'.format(subscription_id, resource_group_name, data_collection_endpoint_name) endpoint_resource_id = dce_resource_id tag_name = "defaultdcr" rg_deployment_result = resource_client.deployments.begin_create_or_update( resource_group_name, tag_name, { "properties": { "template": template_body, "parameters": { "location": { "value": location }, "dataCollectionRuleName": { "value": data_collection_rule_name }, "workspaceResourceId": { "value": workspace_resource_id }, "workspaceName": { "value": workspace_name }, "endpointResourceId": { "value": endpoint_resource_id }, "customTableFullName": { "value": custom_table_full_name } }, "mode": DeploymentMode.incremental } } )
In [ ]:
immutable_id = '' if rg_deployment_result.status() != "Succeeded": print(rg_deployment_result.status()) print('Run the cell until stauts=Succeeded or when you see Failed.') else: immutable_id = rg_deployment_result.result().properties.outputs["immutableId"].get("value") print('You will need DCR Immutable Id for future data ingestion!') print('DCR Immutable Id: ' + immutable_id)
Once finished everything, make sure adding the Entra ID to the new DCR as a Monitoring Metrics Publisher