Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
Azure
GitHub Repository: Azure/Azure-Sentinel-Notebooks
Path: blob/master/tutorials-and-examples/how-tos/Automation Setup - Configure Azure Machine Learning Pipelines.ipynb
3253 views
Kernel: Python 3.8 - AzureML

Automation Setup - Configure Azure Machine Learning Pipelines

Notebook Version: 1.0
Python Version: Python 3.8 - AzureML
Required Packages: No
Platforms Supported: Azure Machine Learning Notebooks

Data Source Required: No

Description

This is the second notebook of series for setting up Microsoft Sentinel notebook automation platform based on Azure Machine Learning Pipelines.
Before starting this notebook, you should have a notebook to be executed automatically ready.
This notebook provides step-by-step instructions to create Azure Machine Learning Pipeline, publish it, and schedule to run the pipeline to execute the targeted notebook.

*** Please run the cells sequentially to avoid errors. Please do not use "run all cells". ***

Table of Contents

  1. Warm-up

  2. Authentication to Azure Resources

  3. Azure Machine Learning Pipleline

1. Warm-up

# Azure Machine Learning and Pipeline SDK-specific imports # azureml import azureml.core from azureml.core import Workspace, Experiment from azureml.core.datastore import Datastore from azureml.core.runconfig import RunConfiguration from azureml.core.conda_dependencies import CondaDependencies from azureml.contrib.notebook import NotebookRunConfig, AzureMLNotebookHandler from azureml.pipeline.core import Pipeline from azureml.pipeline.core import PipelineData from azureml.contrib.notebook import NotebookRunnerStep from azureml.pipeline.core.schedule import ScheduleRecurrence, Schedule # azure common/core from azure.common.credentials import get_azure_cli_credentials from azure.mgmt.resource import ResourceManagementClient # Python/ipython import json from datetime import datetime from IPython.display import display, HTML, Markdown # Check core SDK version number print("SDK version:", azureml.core.VERSION)
# Functions will be used in this notebook def read_config_values(file_path): "This loads pre-generated parameters for Microsoft Sentinel Workspace" with open(file_path) as json_file: if json_file: json_config = json.load(json_file) return (json_config["tenant_id"], json_config["subscription_id"], json_config["resource_group"], json_config["workspace_id"], json_config["workspace_name"], json_config["user_alias"], json_config["user_object_id"]) return None def has_valid_token(): "Check to see if there is a valid AAD token" try: credentials, sub_id = get_azure_cli_credentials() creds = credentials._get_cred(resource=None) token = creds._token_retriever()[2] print("Successfully signed in.") return True except Exception as ex: if "Please run 'az login' to setup account" in str(ex): print("Please sign in first.") return False elif "AADSTS70043: The refresh token has expired" in str(ex): message = "**The refresh token has expired. <br> Please continue your login process. Then: <br> 1. If you plan to run multiple notebooks on the same compute instance today, you may restart the compute instance by clicking 'Compute' on left menu, then select the instance, clicking 'Restart'; <br> 2. Otherwise, you may just restart the kernel from top menu. <br> Finally, close and re-load the notebook, then re-run cells one by one from the top.**" display(Markdown(message)) return False elif "[Errno 2] No such file or directory: '/home/azureuser/.azure/azureProfile.json'" in str(ex): print("Please sign in.") return False else: print(str(ex)) return False except: print("Please restart the kernel, and run 'az login'.") return False
# Calling the above function to populate Microsoft Sentinel workspace parameters # The file, config.json, was generated by the system, however, you may modify the values, or manually set the variables tenant_id, subscription_id, resource_group, workspace_id, workspace_name, user_alias, user_object_id = read_config_values('config.json'); print("Subscription Id: " + subscription_id)

2. Authentication to Azure Resources

# Azure CLI is used to get device code to login into Azure, you need to copy the code and open the DeviceLogin site. # You may add [--tenant $tenant_id] to the command if has_valid_token() == False: !echo -e '\e[42m' !az login --tenant $tenant_id --use-device-code

3. Azure Machine Learning Pipleline

# 1. Enter resource names # Enter name of an Azure resource group resource_group = 'myresourcegroup' # Enter current AML workspace name current_aml_workspace_name = 'auto2022' # Enter compute cluster name amlcompute_cluster_name = 'compcl2022'
# 2. Get AML workspace ws = Workspace.get(name=current_aml_workspace_name, subscription_id=subscription_id, resource_group=resource_group) print(ws) ws.set_default_datastore("workspaceblobstore") datastore = Datastore.get(ws, "workspaceblobstore")
# 3. Create a new RunConfig object source_directory = '' notebook_name = 'Automation Gallery - Credential Scan on Azure Blob Storage.ipynb' output_notebook_name = 'blob_scan_results.ipynb' conda_run_config = RunConfiguration(framework="python") conda_run_config.environment.docker.base_image = azureml.core.runconfig.DEFAULT_CPU_IMAGE print('conda-run config is ready') # Create notebook run configuration and set parameters values handler = AzureMLNotebookHandler(timeout=600, progress_bar=False, log_output=True) cfg = NotebookRunConfig(source_directory=source_directory, notebook=notebook_name, handler = handler, parameters={}, run_config=conda_run_config, output_notebook=output_notebook_name) print("Notebook Run Config is created.")
# 4. Define NotebookRunnerStep #my_pipeline_param = PipelineParameter(name="my_pipeline_param", default_value=datetime.now().strftime("%Y-%m-%d %H:%M:%S")) output_name = "notebookresult_2022" output_from_notebook = PipelineData(name="notebook_processed_data", datastore=Datastore.get(ws, "workspaceblobstore"),output_overwrite=True, output_mode="upload") notebook_runner_step = NotebookRunnerStep(name="sentinel_notebook_step", notebook_run_config=cfg, params = {}, # params={"my_pipeline_param": my_pipeline_param}, inputs=[], outputs=[], allow_reuse=False, compute_target=amlcompute_cluster_name, output_notebook_pipeline_data_name=output_name) print("Notebook Runner Step is Created.")
# 5. Build Pipeline and publish it pipeline4sentinel = Pipeline(workspace=ws, steps=[notebook_runner_step]) print("Pipeline creation complete") # Publish the pipeline timenow = datetime.now().strftime('%Y-%m-%d-%H-%M') pipeline_name = "Sentinel-Pipeline-" + timenow published_sentinel_pipeline = pipeline4sentinel.publish( name=pipeline_name, description=pipeline_name) print("Newly published pipeline id: {}".format(published_sentinel_pipeline.id)) print("Endpoint: {}".format(published_sentinel_pipeline.endpoint))
# 6. Create a schedule for the published pipeline using a recurrence schedule_name = 'sentinel_schedule' experiment_name = 'sentinel_experiment_2022' recurrence = ScheduleRecurrence(frequency="Day", interval=1, hours=[22], minutes=[30]) # Runs every other day at 10:30pm #recurrence = ScheduleRecurrence(frequency="Hour", interval=8) # Runs every two hours schedule = Schedule.create(workspace=ws, name=schedule_name, pipeline_id=published_sentinel_pipeline.id, experiment_name=experiment_name, recurrence=recurrence, wait_for_provisioning=True, description="Schedule to run Sentinel notebook") print("Created schedule with id: {}".format(schedule.id))