def query_la(workspace_id_query, query):
la_data_client = LogsQueryClient(credential=credential)
end_time = datetime.now(timezone.utc)
start_time = end_time - timedelta(7)
print(query)
print('starting ' + str(end_time.timestamp()))
la_data_client.query_workspace(
workspace_id=workspace_id_query,
query=query,
timespan=(start_time, end_time))
final_time = datetime.now(timezone.utc)
print('ending ' + str(final_time.timestamp()))
return (final_time - end_time).total_seconds()
def slice_query_la(query, lookback_start, lookback_end='0', lookback_unit='h', query_row_limit=400000, split_factor=2):
"Slice the time to render records <= 500K"
count_query = query.format(lookback_start, lookback_unit, lookback_end)
count = ' | summarize count()'
count_query = count_query + count
df_count = query_la(workspace_id_source, count_query)
row_count = df_count['count_'][0]
print(count_query)
print(row_count)
df_final = pd.DataFrame()
if row_count > query_row_limit:
number_of_divide = 0
while row_count > query_row_limit:
row_count = row_count / split_factor
number_of_divide = number_of_divide + 1
factor = split_factor ** number_of_divide
step_number = math.ceil(int(lookback_start) / factor)
if factor > int(lookback_start) and lookback_unit == 'h':
lookback_unit = 'm'
number_of_minutes = 60
step_number = math.ceil(int(lookback_start)*number_of_minutes / factor)
try:
for i in range(int(lookback_end), factor + 1, 1):
if i > 0:
df_la_query = pd.DataFrame
current_query = query.format(i * step_number, lookback_unit, (i - 1) * step_number)
print(current_query)
df_la_query = query_la(workspace_id_source, current_query)
print(df_la_query.shape[0])
df_final = pd.concat([df_final, df_la_query])
except:
print("query failed")
raise
else:
df_final = query_la(workspace_id_source, query.format(lookback_start, lookback_unit, lookback_end))
return df_final