Metrics for the SCONE Service Operator
Retrieve the metrics from the metrics service endpoint
You can retrieve the metrics published by the scone-service-operator with the following commands.
Create a port-forward to the metrics service endpoint.
export SCONE_SERVICE_OPERATOR_NAMESPACE=osc-sec-scone-svc-operator
k port-forward -n $SCONE_SERVICE_OPERATOR_NAMESPACE service/scone-service-operator-controller-metrics 8080:8080
From another terminal, run the following command to retrieve the metrics:
curl http://localhost:8080/metrics
Scone Service Operator Metrics
# HELP certwatcher_read_certificate_errors_total Total number of certificate read errors # TYPE certwatcher_read_certificate_errors_total counter certwatcher_read_certificate_errors_total 0 # HELP certwatcher_read_certificate_total Total number of certificate reads # TYPE certwatcher_read_certificate_total counter certwatcher_read_certificate_total 1 # HELP controller_runtime_active_workers Number of currently used workers per controller # TYPE controller_runtime_active_workers gauge controller_runtime_active_workers{controller="scone"} 0 # HELP controller_runtime_max_concurrent_reconciles Maximum number of concurrent reconciles per controller # TYPE controller_runtime_max_concurrent_reconciles gauge controller_runtime_max_concurrent_reconciles{controller="scone"} 1 # HELP controller_runtime_reconcile_errors_total Total number of reconciliation errors per controller # TYPE controller_runtime_reconcile_errors_total counter controller_runtime_reconcile_errors_total{controller="scone"} 0 # HELP controller_runtime_reconcile_total Total number of reconciliations per controller # TYPE controller_runtime_reconcile_total counter controller_runtime_reconcile_total{controller="scone",result="error"} 0 controller_runtime_reconcile_total{controller="scone",result="requeue"} 0 controller_runtime_reconcile_total{controller="scone",result="requeue_after"} 0 controller_runtime_reconcile_total{controller="scone",result="success"} 0 # HELP controller_runtime_webhook_requests_in_flight Current number of admission requests being served. # TYPE controller_runtime_webhook_requests_in_flight gauge controller_runtime_webhook_requests_in_flight{webhook="/mutate-services-scone-cloud-v1beta1-cas"} 0 controller_runtime_webhook_requests_in_flight{webhook="/validate-confidential-security-osc-t-systems-com-v1alpha1-scone"} 0 controller_runtime_webhook_requests_in_flight{webhook="/validate-services-scone-cloud-v1beta1-cas"} 0 # HELP controller_runtime_webhook_requests_total Total number of admission requests by HTTP status code. # TYPE controller_runtime_webhook_requests_total counter controller_runtime_webhook_requests_total{code="200",webhook="/mutate-services-scone-cloud-v1beta1-cas"} 0 controller_runtime_webhook_requests_total{code="200",webhook="/validate-confidential-security-osc-t-systems-com-v1alpha1-scone"} 0 controller_runtime_webhook_requests_total{code="200",webhook="/validate-services-scone-cloud-v1beta1-cas"} 0 controller_runtime_webhook_requests_total{code="500",webhook="/mutate-services-scone-cloud-v1beta1-cas"} 0 controller_runtime_webhook_requests_total{code="500",webhook="/validate-confidential-security-osc-t-systems-com-v1alpha1-scone"} 0 controller_runtime_webhook_requests_total{code="500",webhook="/validate-services-scone-cloud-v1beta1-cas"} 0 # HELP go_gc_duration_seconds A summary of the pause duration of garbage collection cycles. # TYPE go_gc_duration_seconds summary go_gc_duration_seconds{quantile="0"} 3.3998e-05 go_gc_duration_seconds{quantile="0.25"} 3.9339e-05 go_gc_duration_seconds{quantile="0.5"} 4.4502e-05 go_gc_duration_seconds{quantile="0.75"} 7.5572e-05 go_gc_duration_seconds{quantile="1"} 0.000345838 go_gc_duration_seconds_sum 0.010850869 go_gc_duration_seconds_count 167 # HELP go_goroutines Number of goroutines that currently exist. # TYPE go_goroutines gauge go_goroutines 47 # HELP go_info Information about the Go environment. # TYPE go_info gauge go_info{version="go1.22.4"} 1 # HELP go_memstats_alloc_bytes Number of bytes allocated and still in use. # TYPE go_memstats_alloc_bytes gauge go_memstats_alloc_bytes 7.917472e+06 # HELP go_memstats_alloc_bytes_total Total number of bytes allocated, even if freed. # TYPE go_memstats_alloc_bytes_total counter go_memstats_alloc_bytes_total 5.8358292e+08 # HELP go_memstats_buck_hash_sys_bytes Number of bytes used by the profiling bucket hash table. # TYPE go_memstats_buck_hash_sys_bytes gauge go_memstats_buck_hash_sys_bytes 1.536884e+06 # HELP go_memstats_frees_total Total number of frees. # TYPE go_memstats_frees_total counter go_memstats_frees_total 3.941242e+06 # HELP go_memstats_gc_sys_bytes Number of bytes used for garbage collection system metadata. # TYPE go_memstats_gc_sys_bytes gauge go_memstats_gc_sys_bytes 3.463544e+06 # HELP go_memstats_heap_alloc_bytes Number of heap bytes allocated and still in use. # TYPE go_memstats_heap_alloc_bytes gauge go_memstats_heap_alloc_bytes 7.917472e+06 # HELP go_memstats_heap_idle_bytes Number of heap bytes waiting to be used. # TYPE go_memstats_heap_idle_bytes gauge go_memstats_heap_idle_bytes 8.290304e+06 # HELP go_memstats_heap_inuse_bytes Number of heap bytes that are in use. # TYPE go_memstats_heap_inuse_bytes gauge go_memstats_heap_inuse_bytes 1.1288576e+07 # HELP go_memstats_heap_objects Number of allocated objects. # TYPE go_memstats_heap_objects gauge go_memstats_heap_objects 25021 # HELP go_memstats_heap_released_bytes Number of heap bytes released to OS. # TYPE go_memstats_heap_released_bytes gauge go_memstats_heap_released_bytes 7.462912e+06 # HELP go_memstats_heap_sys_bytes Number of heap bytes obtained from system. # TYPE go_memstats_heap_sys_bytes gauge go_memstats_heap_sys_bytes 1.957888e+07 # HELP go_memstats_last_gc_time_seconds Number of seconds since 1970 of last garbage collection. # TYPE go_memstats_last_gc_time_seconds gauge go_memstats_last_gc_time_seconds 1.7260601083890316e+09 # HELP go_memstats_lookups_total Total number of pointer lookups. # TYPE go_memstats_lookups_total counter go_memstats_lookups_total 0 # HELP go_memstats_mallocs_total Total number of mallocs. # TYPE go_memstats_mallocs_total counter go_memstats_mallocs_total 3.966263e+06 # HELP go_memstats_mcache_inuse_bytes Number of bytes in use by mcache structures. # TYPE go_memstats_mcache_inuse_bytes gauge go_memstats_mcache_inuse_bytes 4800 # HELP go_memstats_mcache_sys_bytes Number of bytes used for mcache structures obtained from system. # TYPE go_memstats_mcache_sys_bytes gauge go_memstats_mcache_sys_bytes 15600 # HELP go_memstats_mspan_inuse_bytes Number of bytes in use by mspan structures. # TYPE go_memstats_mspan_inuse_bytes gauge go_memstats_mspan_inuse_bytes 167520 # HELP go_memstats_mspan_sys_bytes Number of bytes used for mspan structures obtained from system. # TYPE go_memstats_mspan_sys_bytes gauge go_memstats_mspan_sys_bytes 195840 # HELP go_memstats_next_gc_bytes Number of heap bytes when next garbage collection will take place. # TYPE go_memstats_next_gc_bytes gauge go_memstats_next_gc_bytes 1.3738968e+07 # HELP go_memstats_other_sys_bytes Number of bytes used for other system allocations. # TYPE go_memstats_other_sys_bytes gauge go_memstats_other_sys_bytes 953900 # HELP go_memstats_stack_inuse_bytes Number of bytes in use by the stack allocator. # TYPE go_memstats_stack_inuse_bytes gauge go_memstats_stack_inuse_bytes 1.343488e+06 # HELP go_memstats_stack_sys_bytes Number of bytes obtained from system for stack allocator. # TYPE go_memstats_stack_sys_bytes gauge go_memstats_stack_sys_bytes 1.343488e+06 # HELP go_memstats_sys_bytes Number of bytes obtained from system. # TYPE go_memstats_sys_bytes gauge go_memstats_sys_bytes 2.7088136e+07 # HELP go_threads Number of OS threads created. # TYPE go_threads gauge go_threads 10 # HELP leader_election_master_status Gauge of if the reporting system is master of the relevant lease, 0 indicates backup, 1 indicates master. 'name' is the string used to identify the lease. Please make sure to group by name. # TYPE leader_election_master_status gauge leader_election_master_status{name="scone-service-operator.confidential.security.osc.t-systems.com"} 1 # HELP osc_sec_scone_billing_cpu_count The total number of CPUs relevant for billing # TYPE osc_sec_scone_billing_cpu_count gauge osc_sec_scone_billing_cpu_count{name="Billing LAS CPU count"} 0 # HELP osc_sws_disabled_safeguards Indicates whether the CR has disabled safeguards # TYPE osc_sws_disabled_safeguards gauge osc_sws_disabled_safeguards{cr_name="scone",cr_namespace="",cr_type="Scone",safeguard_name="unmanaged"} 0 # HELP osc_sws_operator_safe_to_delete Indicates whether the operator is safe to delete # TYPE osc_sws_operator_safe_to_delete gauge osc_sws_operator_safe_to_delete{operator_name="osc-sec-scone-controller"} 1 # HELP osc_sws_total_cr_count The total number of CRs of the given type # TYPE osc_sws_total_cr_count gauge osc_sws_total_cr_count{cr_type="Scone",operator_name="osc-sec-scone-controller"} 0 # HELP process_cpu_seconds_total Total user and system CPU time spent in seconds. # TYPE process_cpu_seconds_total counter process_cpu_seconds_total 17.39 # HELP process_max_fds Maximum number of open file descriptors. # TYPE process_max_fds gauge process_max_fds 1.048576e+06 # HELP process_open_fds Number of open file descriptors. # TYPE process_open_fds gauge process_open_fds 14 # HELP process_resident_memory_bytes Resident memory size in bytes. # TYPE process_resident_memory_bytes gauge process_resident_memory_bytes 5.474304e+07 # HELP process_start_time_seconds Start time of the process since unix epoch in seconds. # TYPE process_start_time_seconds gauge process_start_time_seconds 1.72604054825e+09 # HELP process_virtual_memory_bytes Virtual memory size in bytes. # TYPE process_virtual_memory_bytes gauge process_virtual_memory_bytes 5.605801984e+09 # HELP process_virtual_memory_max_bytes Maximum amount of virtual memory available in bytes. # TYPE process_virtual_memory_max_bytes gauge process_virtual_memory_max_bytes 1.8446744073709552e+19 # HELP rest_client_requests_total Number of HTTP requests, partitioned by status code, method, and host. # TYPE rest_client_requests_total counter rest_client_requests_total{code="200",host="api.vk01.sec.internal.fgd1.ffm-dev.osc:443",method="GET"} 54 rest_client_requests_total{code="200",host="api.vk01.sec.internal.fgd1.ffm-dev.osc:443",method="PUT"} 9770 rest_client_requests_total{code="201",host="api.vk01.sec.internal.fgd1.ffm-dev.osc:443",method="POST"} 1 rest_client_requests_total{code="404",host="api.vk01.sec.internal.fgd1.ffm-dev.osc:443",method="GET"} 326 # HELP workqueue_adds_total Total number of adds handled by workqueue # TYPE workqueue_adds_total counter workqueue_adds_total{name="scone"} 0 # HELP workqueue_depth Current depth of workqueue # TYPE workqueue_depth gauge workqueue_depth{name="scone"} 0 # HELP workqueue_longest_running_processor_seconds How many seconds has the longest running processor for workqueue been running. # TYPE workqueue_longest_running_processor_seconds gauge workqueue_longest_running_processor_seconds{name="scone"} 0 # HELP workqueue_queue_duration_seconds How long in seconds an item stays in workqueue before being requested # TYPE workqueue_queue_duration_seconds histogram workqueue_queue_duration_seconds_bucket{name="scone",le="1e-08"} 0 workqueue_queue_duration_seconds_bucket{name="scone",le="1e-07"} 0 workqueue_queue_duration_seconds_bucket{name="scone",le="1e-06"} 0 workqueue_queue_duration_seconds_bucket{name="scone",le="9.999999999999999e-06"} 0 workqueue_queue_duration_seconds_bucket{name="scone",le="9.999999999999999e-05"} 0 workqueue_queue_duration_seconds_bucket{name="scone",le="0.001"} 0 workqueue_queue_duration_seconds_bucket{name="scone",le="0.01"} 0 workqueue_queue_duration_seconds_bucket{name="scone",le="0.1"} 0 workqueue_queue_duration_seconds_bucket{name="scone",le="1"} 0 workqueue_queue_duration_seconds_bucket{name="scone",le="10"} 0 workqueue_queue_duration_seconds_bucket{name="scone",le="100"} 0 workqueue_queue_duration_seconds_bucket{name="scone",le="1000"} 0 workqueue_queue_duration_seconds_bucket{name="scone",le="+Inf"} 0 workqueue_queue_duration_seconds_sum{name="scone"} 0 workqueue_queue_duration_seconds_count{name="scone"} 0 # HELP workqueue_retries_total Total number of retries handled by workqueue # TYPE workqueue_retries_total counter workqueue_retries_total{name="scone"} 0 # HELP workqueue_unfinished_work_seconds How many seconds of work has been done that is in progress and hasn't been observed by work_duration. Large values indicate stuck threads. One can deduce the number of stuck threads by observing the rate at which this increases. # TYPE workqueue_unfinished_work_seconds gauge workqueue_unfinished_work_seconds{name="scone"} 0 # HELP workqueue_work_duration_seconds How long in seconds processing an item from workqueue takes. # TYPE workqueue_work_duration_seconds histogram workqueue_work_duration_seconds_bucket{name="scone",le="1e-08"} 0 workqueue_work_duration_seconds_bucket{name="scone",le="1e-07"} 0 workqueue_work_duration_seconds_bucket{name="scone",le="1e-06"} 0 workqueue_work_duration_seconds_bucket{name="scone",le="9.999999999999999e-06"} 0 workqueue_work_duration_seconds_bucket{name="scone",le="9.999999999999999e-05"} 0 workqueue_work_duration_seconds_bucket{name="scone",le="0.001"} 0 workqueue_work_duration_seconds_bucket{name="scone",le="0.01"} 0 workqueue_work_duration_seconds_bucket{name="scone",le="0.1"} 0 workqueue_work_duration_seconds_bucket{name="scone",le="1"} 0 workqueue_work_duration_seconds_bucket{name="scone",le="10"} 0 workqueue_work_duration_seconds_bucket{name="scone",le="100"} 0 workqueue_work_duration_seconds_bucket{name="scone",le="1000"} 0 workqueue_work_duration_seconds_bucket{name="scone",le="+Inf"} 0 workqueue_work_duration_seconds_sum{name="scone"} 0 workqueue_work_duration_seconds_count{name="scone"} 0