Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
google
GitHub Repository: google/crosvm
Path: blob/main/metrics/src/sys/windows/system_metrics.rs
5394 views
1
// Copyright 2022 The ChromiumOS Authors
2
// Use of this source code is governed by a BSD-style license that can be
3
// found in the LICENSE file.
4
5
use std::fmt;
6
use std::fmt::Write as _;
7
use std::mem;
8
use std::sync::Arc;
9
use std::sync::Mutex;
10
use std::sync::Weak;
11
use std::thread;
12
use std::thread::JoinHandle;
13
use std::time::Duration;
14
use std::time::Instant;
15
16
use base::error;
17
use base::AsRawDescriptor;
18
use base::Error as SysError;
19
use base::Event;
20
use base::EventToken;
21
use base::FromRawDescriptor;
22
use base::SafeDescriptor;
23
use base::WaitContext;
24
use chrono::DateTime;
25
use chrono::Utc;
26
use winapi::shared::minwindef::DWORD;
27
use winapi::shared::minwindef::FILETIME;
28
use winapi::um::processthreadsapi::GetProcessTimes;
29
use winapi::um::processthreadsapi::GetSystemTimes;
30
use winapi::um::processthreadsapi::OpenProcess;
31
use winapi::um::psapi::GetProcessMemoryInfo;
32
use winapi::um::psapi::PROCESS_MEMORY_COUNTERS;
33
use winapi::um::psapi::PROCESS_MEMORY_COUNTERS_EX;
34
use winapi::um::winbase::GetProcessIoCounters;
35
use winapi::um::winnt::IO_COUNTERS;
36
use winapi::um::winnt::LARGE_INTEGER;
37
use winapi::um::winnt::LONGLONG;
38
use winapi::um::winnt::PROCESS_QUERY_LIMITED_INFORMATION;
39
use winapi::um::winnt::PROCESS_VM_READ;
40
use winapi::um::winnt::SYNCHRONIZE;
41
42
use crate::log_metric;
43
use crate::sys::windows::Error;
44
use crate::sys::windows::Result;
45
use crate::sys::windows::METRICS_UPLOAD_INTERVAL;
46
use crate::MetricEventType;
47
48
const BYTES_PER_MB: usize = 1024 * 1024;
49
const WORKER_REPORT_INTERVAL: Duration = Duration::from_secs(1);
50
51
type SysResult<T> = std::result::Result<T, SysError>;
52
53
/// A worker job which periodically logs system metrics.
54
struct Worker {
55
exit_evt: Event,
56
io: Arc<Mutex<Option<ProcessIoRecord>>>,
57
measurements: Arc<Mutex<Option<Measurements>>>,
58
memory: Arc<Mutex<ProcessMemory>>,
59
memory_acc: Arc<Mutex<Option<ProcessMemoryAccumulated>>>,
60
metrics_string: Arc<Mutex<String>>,
61
}
62
63
impl Worker {
64
fn run(&mut self) {
65
#[derive(EventToken)]
66
enum Token {
67
Exit,
68
}
69
let event_ctx: WaitContext<Token> =
70
match WaitContext::build_with(&[(&self.exit_evt, Token::Exit)]) {
71
Ok(event_ctx) => event_ctx,
72
Err(e) => {
73
error!("failed creating WaitContext: {}", e);
74
return;
75
}
76
};
77
let mut last_metric_upload_time = Instant::now();
78
'poll: loop {
79
let events = match event_ctx.wait_timeout(WORKER_REPORT_INTERVAL) {
80
Ok(events) => events,
81
Err(e) => {
82
error!("failed polling for events: {}", e);
83
return;
84
}
85
};
86
if events.is_empty() {
87
self.collect_metrics();
88
// Time budget for UI thread is very limited.
89
// We make the metric string for displaying in UI in the
90
// worker thread for best performance.
91
self.make_metrics_string();
92
93
self.upload_metrics(&mut last_metric_upload_time);
94
}
95
96
if events.into_iter().any(|e| e.is_readable) {
97
break 'poll;
98
}
99
}
100
}
101
102
fn make_metrics_string(&mut self) {
103
let mut metrics_string = self.metrics_string.lock().unwrap();
104
*metrics_string = format!(
105
"{}\n{}",
106
self.cpu_metrics_string(),
107
self.mem_metrics_string()
108
);
109
}
110
111
fn upload_metrics(&self, last_metric_upload_time: &mut Instant) {
112
if last_metric_upload_time.elapsed() >= METRICS_UPLOAD_INTERVAL {
113
let mut memory_acc = self.memory_acc.lock().unwrap();
114
if let Some(acc) = &*memory_acc {
115
let mem = acc.accumulated.physical / acc.accumulated_count / BYTES_PER_MB;
116
// The i64 cast will not cause overflow because the mem is at most 10TB for
117
// Windows 10.
118
log_metric(MetricEventType::MemoryUsage, mem as i64);
119
}
120
*memory_acc = None;
121
122
let mut cpu_measurements = self.measurements.lock().unwrap();
123
if let Some(measurements) = &*cpu_measurements {
124
let sys_time = measurements.current.sys_time;
125
let process_time = measurements.current.process_time;
126
let prev_sys_time = measurements.last_upload.sys_time;
127
let prev_process_time = measurements.last_upload.process_time;
128
129
let diff_systime_kernel =
130
compute_filetime_subtraction(sys_time.kernel, prev_sys_time.kernel);
131
let diff_systime_user =
132
compute_filetime_subtraction(sys_time.user, prev_sys_time.user);
133
134
let diff_processtime_kernel =
135
compute_filetime_subtraction(process_time.kernel, prev_process_time.kernel);
136
let diff_processtime_user =
137
compute_filetime_subtraction(process_time.user, prev_process_time.user);
138
139
let total_systime = diff_systime_kernel + diff_systime_user;
140
let total_processtime = diff_processtime_kernel + diff_processtime_user;
141
142
if total_systime > 0 {
143
let cpu_usage = 100 * total_processtime / total_systime;
144
// The i64 cast will not cause overflow because the usage is at most 100.
145
log_metric(MetricEventType::CpuUsage, cpu_usage);
146
}
147
}
148
*cpu_measurements = None;
149
150
let mut io = self.io.lock().unwrap();
151
if let Some(io_record) = &*io {
152
let new_io_read_bytes =
153
io_record.current.read_bytes - io_record.last_upload.read_bytes;
154
let new_io_write_bytes =
155
io_record.current.write_bytes - io_record.last_upload.write_bytes;
156
157
let ms_elapsed =
158
(io_record.current_time - io_record.last_upload_time).num_milliseconds();
159
if ms_elapsed > 0 {
160
let io_read_bytes_per_sec =
161
(new_io_read_bytes as f32) / (ms_elapsed as f32) * 1000.0;
162
let io_write_bytes_per_sec =
163
(new_io_write_bytes as f32) / (ms_elapsed as f32) * 1000.0;
164
log_metric(MetricEventType::ReadIo, io_read_bytes_per_sec as i64);
165
log_metric(MetricEventType::WriteIo, io_write_bytes_per_sec as i64);
166
}
167
}
168
*io = None;
169
*last_metric_upload_time = Instant::now();
170
}
171
}
172
173
fn collect_metrics(&mut self) {
174
match self.get_cpu_metrics() {
175
Ok(new_measurement) => {
176
let mut measurements = self.measurements.lock().unwrap();
177
let next_measurements = match *measurements {
178
Some(Measurements {
179
current,
180
last_upload,
181
..
182
}) => Measurements {
183
current: new_measurement,
184
previous: current,
185
last_upload,
186
},
187
None => Measurements {
188
current: new_measurement,
189
previous: new_measurement,
190
last_upload: new_measurement,
191
},
192
};
193
*measurements = Some(next_measurements);
194
}
195
Err(e) => {
196
// Do not panic because of cpu query related failures.
197
error!("Get cpu measurement failed: {}", e);
198
}
199
}
200
201
match self.get_mem_metrics() {
202
Ok(mem) => {
203
// Keep running sum and count to calculate averages.
204
let mut memory_acc = self.memory_acc.lock().unwrap();
205
let updated_memory_acc = match *memory_acc {
206
Some(acc) => accumulate_process_memory(acc, mem),
207
None => ProcessMemoryAccumulated {
208
accumulated: mem,
209
accumulated_count: 1,
210
},
211
};
212
*memory_acc = Some(updated_memory_acc);
213
*self.memory.lock().unwrap() = mem
214
}
215
Err(e) => {
216
// Do not panic because of memory query failures.
217
error!("Get cpu measurement failed: {}", e);
218
}
219
}
220
221
match self.get_io_metrics() {
222
Ok(new_io) => {
223
let mut io_record = self.io.lock().unwrap();
224
let updated_io = match *io_record {
225
Some(io) => ProcessIoRecord {
226
current: new_io,
227
current_time: Utc::now(),
228
last_upload: io.last_upload,
229
last_upload_time: io.last_upload_time,
230
},
231
None => ProcessIoRecord {
232
current: new_io,
233
current_time: Utc::now(),
234
last_upload: new_io,
235
last_upload_time: Utc::now(),
236
},
237
};
238
*io_record = Some(updated_io);
239
}
240
Err(e) => {
241
// Do not panic because of io query failures.
242
error!("Get io measurement failed: {}", e);
243
}
244
}
245
}
246
247
fn get_mem_metrics(&self) -> SysResult<ProcessMemory> {
248
let process_handle = CoreWinMetrics::get_process_handle()?;
249
250
let mut counters = PROCESS_MEMORY_COUNTERS_EX::default();
251
252
// SAFETY:
253
// Safe because we own the process handle and all memory was allocated.
254
let result = unsafe {
255
GetProcessMemoryInfo(
256
process_handle.as_raw_descriptor(),
257
// Converting is necessary because the `winapi`' GetProcessMemoryInfo
258
// does NOT support `PROCESS_MEMORY_COUNTERS_EX`, but only
259
// 'PROCESS_MEMORY_COUNTERS'. The casting is safe because the underlining
260
// Windows api does `PROCESS_MEMORY_COUNTERS_EX`.
261
&mut counters as *mut PROCESS_MEMORY_COUNTERS_EX as *mut PROCESS_MEMORY_COUNTERS,
262
mem::size_of::<PROCESS_MEMORY_COUNTERS_EX>() as DWORD,
263
)
264
};
265
if result == 0 {
266
return Err(SysError::last());
267
}
268
269
Ok(ProcessMemory {
270
page_fault_count: counters.PageFaultCount,
271
working_set_size: counters.WorkingSetSize,
272
working_set_peak: counters.PeakWorkingSetSize,
273
page_file_usage: counters.PagefileUsage,
274
page_file_peak: counters.PeakPagefileUsage,
275
physical: counters.PrivateUsage,
276
})
277
}
278
279
fn get_cpu_metrics(&self) -> SysResult<Measurement> {
280
let mut sys_time: SystemCpuTime = Default::default();
281
let mut process_time: ProcessCpuTime = Default::default();
282
let sys_time_success: i32;
283
284
// SAFETY:
285
// Safe because memory is allocated for sys_time before the windows call.
286
// And the value were initilized to 0s.
287
unsafe {
288
// First get kernel cpu time.
289
sys_time_success =
290
GetSystemTimes(&mut sys_time.idle, &mut sys_time.kernel, &mut sys_time.user);
291
}
292
if sys_time_success == 0 {
293
error!("Systime collection failed.\n");
294
return Err(SysError::last());
295
} else {
296
// Query current process cpu time.
297
let process_handle = CoreWinMetrics::get_process_handle()?;
298
let process_time_success: i32;
299
// SAFETY:
300
// Safe because memory is allocated for process_time before the windows call.
301
// And the value were initilized to 0s.
302
unsafe {
303
process_time_success = GetProcessTimes(
304
process_handle.as_raw_descriptor(),
305
&mut process_time.create,
306
&mut process_time.exit,
307
&mut process_time.kernel,
308
&mut process_time.user,
309
);
310
}
311
if process_time_success == 0 {
312
error!("Systime collection failed.\n");
313
return Err(SysError::last());
314
}
315
}
316
Ok(Measurement {
317
sys_time: SystemCpuTime {
318
idle: sys_time.idle,
319
kernel: sys_time.kernel,
320
user: sys_time.user,
321
},
322
process_time: ProcessCpuTime {
323
create: process_time.create,
324
exit: process_time.exit,
325
kernel: process_time.kernel,
326
user: process_time.user,
327
},
328
})
329
}
330
331
fn get_io_metrics(&self) -> SysResult<ProcessIo> {
332
let process_handle = CoreWinMetrics::get_process_handle()?;
333
let mut io_counters = IO_COUNTERS::default();
334
// SAFETY:
335
// Safe because we own the process handle and all memory was allocated.
336
let result = unsafe {
337
GetProcessIoCounters(
338
process_handle.as_raw_descriptor(),
339
&mut io_counters as *mut IO_COUNTERS,
340
)
341
};
342
if result == 0 {
343
return Err(SysError::last());
344
}
345
Ok(ProcessIo {
346
read_bytes: io_counters.ReadTransferCount,
347
write_bytes: io_counters.WriteTransferCount,
348
})
349
}
350
351
fn mem_metrics_string(&self) -> String {
352
let guard = self.memory.lock().unwrap();
353
let memory: ProcessMemory = *guard;
354
let mut buf = format!(
355
"Physical memory used: {} mb.\n",
356
memory.physical / BYTES_PER_MB
357
);
358
let _ = writeln!(
359
buf,
360
"Total working memory: {} mb.",
361
memory.working_set_size / BYTES_PER_MB
362
);
363
let _ = writeln!(
364
buf,
365
"Peak working memory: {} mb.",
366
memory.working_set_peak / BYTES_PER_MB
367
);
368
let _ = writeln!(buf, "Page fault count: {}.", memory.page_fault_count);
369
let _ = writeln!(
370
buf,
371
"Page file used: {} mb.",
372
memory.page_file_usage / BYTES_PER_MB
373
);
374
let _ = writeln!(
375
buf,
376
"Peak page file used: {} mb.",
377
memory.page_file_peak / BYTES_PER_MB
378
);
379
buf
380
}
381
382
fn cpu_metrics_string(&self) -> String {
383
let guard = self.measurements.lock().unwrap();
384
let mut buf = String::new();
385
386
// Now we use current and last cpu measurment data to calculate cpu usage
387
// as a percentage.
388
if let Some(measurements) = &*guard {
389
let sys_time = measurements.current.sys_time;
390
let process_time = measurements.current.process_time;
391
let prev_sys_time = measurements.previous.sys_time;
392
let prev_process_time = measurements.previous.process_time;
393
394
let diff_systime_kernel =
395
compute_filetime_subtraction(sys_time.kernel, prev_sys_time.kernel);
396
let diff_systime_user = compute_filetime_subtraction(sys_time.user, prev_sys_time.user);
397
398
let diff_processtime_kernel =
399
compute_filetime_subtraction(process_time.kernel, prev_process_time.kernel);
400
let diff_processtime_user =
401
compute_filetime_subtraction(process_time.user, prev_process_time.user);
402
403
let total_systime = diff_systime_kernel + diff_systime_user;
404
let total_processtime = diff_processtime_kernel + diff_processtime_user;
405
406
let mut process_cpu = String::from("still calculating...");
407
if total_systime > 0 {
408
process_cpu = format!("{}%", (100 * total_processtime / total_systime));
409
}
410
let _ = writeln!(buf, "Process cpu usage is: {process_cpu}");
411
412
#[cfg(debug_assertions)]
413
{
414
// Show data supporting our cpu usage calculation.
415
// Output system cpu time.
416
let _ = writeln!(
417
buf,
418
"Systime Idle: low {} / high {}",
419
sys_time.idle.dwLowDateTime, sys_time.idle.dwHighDateTime
420
);
421
let _ = writeln!(
422
buf,
423
"Systime User: low {} / high {}",
424
sys_time.user.dwLowDateTime, sys_time.user.dwHighDateTime
425
);
426
let _ = writeln!(
427
buf,
428
"Systime kernel: low {} / high {}",
429
sys_time.kernel.dwLowDateTime, sys_time.kernel.dwHighDateTime
430
);
431
// Output process cpu time.
432
let _ = writeln!(
433
buf,
434
"Process Create: low {} / high {}",
435
process_time.create.dwLowDateTime, process_time.create.dwHighDateTime
436
);
437
let _ = writeln!(
438
buf,
439
"Process Exit: low {} / high {}",
440
process_time.exit.dwLowDateTime, process_time.exit.dwHighDateTime
441
);
442
let _ = writeln!(
443
buf,
444
"Process kernel: low {} / high {}",
445
process_time.kernel.dwLowDateTime, process_time.kernel.dwHighDateTime
446
);
447
let _ = writeln!(
448
buf,
449
"Process user: low {} / high {}",
450
process_time.user.dwLowDateTime, process_time.user.dwHighDateTime
451
);
452
}
453
} else {
454
let _ = write!(buf, "Calculating cpu usage...");
455
}
456
buf
457
}
458
}
459
460
fn compute_filetime_subtraction(fta: FILETIME, ftb: FILETIME) -> LONGLONG {
461
// SAFETY:
462
// safe because we are initializing the struct to 0s.
463
unsafe {
464
let mut a: LARGE_INTEGER = mem::zeroed::<LARGE_INTEGER>();
465
a.u_mut().LowPart = fta.dwLowDateTime;
466
a.u_mut().HighPart = fta.dwHighDateTime as i32;
467
let mut b: LARGE_INTEGER = mem::zeroed::<LARGE_INTEGER>();
468
b.u_mut().LowPart = ftb.dwLowDateTime;
469
b.u_mut().HighPart = ftb.dwHighDateTime as i32;
470
a.QuadPart() - b.QuadPart()
471
}
472
}
473
474
// Adds to a running total of memory metrics over the course of a collection period.
475
// Can divide these sums to calculate averages.
476
fn accumulate_process_memory(
477
acc: ProcessMemoryAccumulated,
478
mem: ProcessMemory,
479
) -> ProcessMemoryAccumulated {
480
ProcessMemoryAccumulated {
481
accumulated: ProcessMemory {
482
page_fault_count: mem.page_fault_count,
483
working_set_size: acc.accumulated.working_set_size + mem.working_set_size,
484
working_set_peak: mem.working_set_peak,
485
page_file_usage: acc.accumulated.page_file_usage + mem.page_file_usage,
486
page_file_peak: mem.page_file_peak,
487
physical: acc.accumulated.physical + mem.physical,
488
},
489
accumulated_count: acc.accumulated_count + 1,
490
}
491
}
492
493
#[derive(Copy, Clone, Default)]
494
struct SystemCpuTime {
495
idle: FILETIME,
496
kernel: FILETIME,
497
user: FILETIME,
498
}
499
500
#[derive(Copy, Clone, Default)]
501
struct ProcessCpuTime {
502
create: FILETIME,
503
exit: FILETIME,
504
kernel: FILETIME,
505
user: FILETIME,
506
}
507
508
#[derive(Copy, Clone, Default)]
509
struct ProcessMemory {
510
page_fault_count: u32,
511
working_set_size: usize,
512
working_set_peak: usize,
513
page_file_usage: usize,
514
page_file_peak: usize,
515
physical: usize,
516
}
517
518
#[derive(Copy, Clone)]
519
struct ProcessMemoryAccumulated {
520
accumulated: ProcessMemory,
521
accumulated_count: usize,
522
}
523
524
#[derive(Copy, Clone, Default)]
525
struct ProcessIo {
526
read_bytes: u64,
527
write_bytes: u64,
528
}
529
530
#[derive(Copy, Clone)]
531
struct ProcessIoRecord {
532
current: ProcessIo,
533
current_time: DateTime<Utc>,
534
last_upload: ProcessIo,
535
last_upload_time: DateTime<Utc>,
536
}
537
538
#[derive(Copy, Clone)]
539
struct Measurement {
540
sys_time: SystemCpuTime,
541
process_time: ProcessCpuTime,
542
}
543
544
struct Measurements {
545
current: Measurement,
546
previous: Measurement,
547
last_upload: Measurement,
548
}
549
550
/// A managing struct for a job which defines regular logging of core Windows system metrics.
551
pub(crate) struct CoreWinMetrics {
552
metrics_string: Weak<Mutex<String>>,
553
exit_evt: Event,
554
worker_thread: Option<JoinHandle<()>>,
555
}
556
557
impl CoreWinMetrics {
558
pub fn new() -> Result<Self> {
559
let exit_evt = match Event::new() {
560
Ok(evt) => evt,
561
Err(_e) => return Err(Error::CannotInstantiateEvent),
562
};
563
564
let metrics_string = String::new();
565
let arc_metrics_memory = Arc::new(Mutex::new(metrics_string));
566
let weak_metrics_memory = Arc::downgrade(&arc_metrics_memory);
567
568
let mut me = Self {
569
metrics_string: weak_metrics_memory,
570
exit_evt,
571
worker_thread: None,
572
};
573
let exit_evt_clone = match me.exit_evt.try_clone() {
574
Ok(evt) => evt,
575
Err(_) => return Err(Error::CannotCloneEvent),
576
};
577
me.worker_thread.replace(thread::spawn(|| {
578
Worker {
579
exit_evt: exit_evt_clone,
580
io: Arc::new(Mutex::new(None)),
581
measurements: Arc::new(Mutex::new(None)),
582
memory: Arc::new(Mutex::new(Default::default())),
583
memory_acc: Arc::new(Mutex::new(None)),
584
metrics_string: arc_metrics_memory,
585
}
586
.run();
587
}));
588
Ok(me)
589
}
590
591
fn get_process_handle() -> SysResult<SafeDescriptor> {
592
// SAFETY:
593
// Safe because we own the current process.
594
let process_handle = unsafe {
595
OpenProcess(
596
PROCESS_QUERY_LIMITED_INFORMATION | PROCESS_VM_READ | SYNCHRONIZE,
597
0,
598
std::process::id(),
599
)
600
};
601
if process_handle.is_null() {
602
return Err(SysError::last());
603
}
604
// SAFETY:
605
// Safe as the SafeDescriptor is the only thing with access to the handle after this.
606
Ok(unsafe { SafeDescriptor::from_raw_descriptor(process_handle) })
607
}
608
}
609
610
impl Drop for CoreWinMetrics {
611
fn drop(&mut self) {
612
if let Some(join_handle) = self.worker_thread.take() {
613
let _ = self.exit_evt.signal();
614
join_handle
615
.join()
616
.expect("fail to join the worker thread of a win core metrics collector.");
617
}
618
}
619
}
620
621
impl fmt::Display for CoreWinMetrics {
622
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
623
match self.metrics_string.upgrade() {
624
Some(metrics_string) => write!(f, "{}", *metrics_string.lock().unwrap()),
625
None => write!(f, ""),
626
}
627
}
628
}
629
630