FFmpeg
perf.c
Go to the documentation of this file.
1 /*
2  * Copyright © 2025, Niklas Haas
3  * Copyright © 2018, VideoLAN and dav1d authors
4  * Copyright © 2018, Two Orioles, LLC
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions are met:
9  *
10  * 1. Redistributions of source code must retain the above copyright notice, this
11  * list of conditions and the following disclaimer.
12  *
13  * 2. Redistributions in binary form must reproduce the above copyright notice,
14  * this list of conditions and the following disclaimer in the documentation
15  * and/or other materials provided with the distribution.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
18  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
19  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
20  * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
21  * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
22  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
23  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
24  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
26  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27  */
28 
29 #include "checkasm_config.h"
30 
31 #include <limits.h>
32 #include <stdio.h>
33 
34 #include "checkasm/perf.h"
35 #include "checkasm/test.h"
36 #include "internal.h"
37 #include "perf_internal.h"
38 #include "stats.h"
39 
40 #ifdef CHECKASM_PERF_ASM
41 static uint64_t perf_start_asm(void)
42 {
43  return CHECKASM_PERF_ASM();
44 }
45 
46 static uint64_t perf_stop_asm(uint64_t t)
47 {
48  return CHECKASM_PERF_ASM() - t;
49 }
50 #endif
51 
53 
55 {
56  return &checkasm_perf;
57 }
58 
60 {
61  /* checkasm_gettime_nsec() is needed to validate asm timers */
62  if (checkasm_gettime_nsec() == (uint64_t) -1) {
63  fprintf(stderr, "checkasm: timers are not available on this system\n");
64  return 1;
65  }
66 
67 #if defined(CHECKASM_PERF_ASM) && CHECKASM_HAVE_LONGJMP
69  /* Try calling the asm timer to see if it works */
73 
74  checkasm_perf.start = perf_start_asm;
75  checkasm_perf.stop = perf_stop_asm;
78  checkasm_perf.asm_usable = 1;
79  } else {
80  fprintf(stderr, "checkasm: unable to access %s cycle counter\n",
82  checkasm_perf.asm_usable = 0;
83  }
84 
85  #ifdef CHECKASM_PERF_ASM_INIT
86  /* Try starting the timers, if possible */
89  CHECKASM_PERF_ASM_INIT();
91 
92  /* If starting the timers seems to work, run that on all cores. */
93  checkasm_run_on_all_cores(CHECKASM_PERF_ASM_INIT);
94  }
95  #endif
96 
97  /* If we got an asm timer, validate that it works. */
98  if (checkasm_perf.asm_usable) {
100  return 0;
101  checkasm_perf.asm_usable = 0;
102  }
103 #endif
104 
105 #if HAVE_LINUX_PERF
107  return 0;
108 #endif
109 
110 #if HAVE_MACOS_KPERF
112  return 0;
113 #endif
114 
115 #if ARCH_ARM || ARCH_AARCH64
117  return 0;
118 #endif
119 
120  /* Generic fallback to gettime() if supported */
123  checkasm_perf.name = "gettime";
124  checkasm_perf.unit = "nsec";
125  return 0;
126 }
127 
129 {
130  /* Try to make the loop long enough to be sure that the timer should
131  * increment, if it is functional. */
132  const uint64_t target_nsec = 20000; /* 20 us */
133  const uint64_t start_cycles = perf->start();
134  const uint64_t start_nsec = checkasm_gettime_nsec();
135 
136  /* Only loop as long as we get the initial timer value; we exit the loop
137  * as soon as we see the timer return a different value.
138  * This works for a timer where we can just call the ->start() function
139  * repeatedly, getting new timer values. */
140  while (perf->start() == start_cycles) {
141  if (checkasm_gettime_nsec_diff(start_nsec) > target_nsec) {
142  fprintf(stderr, "checkasm: %s timer doesn't increment\n", perf->name);
143  return 1;
144  }
145  }
146 
147  return 0;
148 }
149 
151 {
152  /* Try to make the loop long enough to be sure that the timer should
153  * increment, if it is functional. */
154  const uint64_t target_nsec = 20000; /* 20 us */
155  const uint64_t start_nsec = checkasm_gettime_nsec();
156 
157  uint64_t cycles = perf->start();
158  /* For timers that require a pair of start/stop calls, run a busy loop
159  * until long enough has passed, that the timer should have incremented. */
160  while (checkasm_gettime_nsec_diff(start_nsec) <= target_nsec) {
161  for (int i = 0; i < 100; i++)
163  }
164  cycles = perf->stop(cycles);
165 
166  if (cycles == 0) {
167  /* The timer doesn't seem to increment at all. */
168  fprintf(stderr, "checkasm: %s timer doesn't increment\n", perf->name);
169  return 1;
170  }
171 
172  return 0;
173 }
174 
175 /* Measure the overhead of the timing code */
177 {
180  stats.next_count = 128; /* ensure we use ASM timers if available */
181 
182  void (*const bench_func)(void *) = checkasm_noop;
183  void *const ptr0 = (void *) 0x1000, *const ptr1 = (void *) 0x2000;
184 
185  const CheckasmPerf perf = checkasm_perf;
186  (void) perf;
187 
188  for (uint64_t total_cycles = 0; total_cycles < target_cycles;) {
189  int count = stats.next_count;
190  uint64_t cycles = 0;
191 
192  /* Spin up the CPU */
193  for (int i = 0; i < 100; i++)
195 
196  /* Measure the overhead of the timing code (in cycles) */
197  CHECKASM_PERF_BENCH(count, cycles, alternate(ptr0, ptr1));
198  total_cycles += cycles;
199 
202  if (stats.nb_samples == (int) ARRAY_SIZE(stats.samples))
203  break;
204  }
205 
207 }
208 
210 {
211  const CheckasmPerf perf = checkasm_perf;
212  if (!strcmp(perf.unit, "nsec")) {
213  *meas = (CheckasmMeasurement) {
214  .product = checkasm_var_const(1.0),
215  .nb_measurements = 1,
216  };
217  return;
218  }
219 
220  /* Try to make the loop long enough to be measurable, but not too long
221  * to avoid being affected by CPU frequency scaling or preemption */
222  const uint64_t target_nsec = 100000; /* 100 us */
223 
224  /* Estimate the time per loop iteration in two different ways */
227  stats.next_count = 100;
228 
229  while (stats.nb_samples < (int) ARRAY_SIZE(stats.samples)) {
230  const int iters = stats.next_count;
231 
232  /* Warm up the CPU a tiny bit */
233  for (int i = 0; i < 100; i++)
235 
236  uint64_t cycles;
237  cycles = perf.start();
238  for (int i = 0; i < iters; i++)
240  cycles = perf.stop(cycles);
241 
242  /* Measure the same loop with wallclock time instead of cycles */
243  uint64_t nsec = checkasm_gettime_nsec();
244  for (int i = 0; i < iters; i++)
246  nsec = checkasm_gettime_nsec_diff(nsec);
247 
248  assert(cycles <= INT_MAX);
249  checkasm_stats_add(&stats, (CheckasmSample) { nsec, (int) cycles });
250  checkasm_stats_count_grow(&stats, nsec, target_nsec);
251  if (nsec > target_nsec)
252  break;
253  }
254 
256 }
COLD
#define COLD
Definition: internal.h:45
CHECKASM_PERF_ASM_UNIT
#define CHECKASM_PERF_ASM_UNIT
Definition: aarch64.h:63
CheckasmStats::next_count
int next_count
Definition: stats.h:91
checkasm_config.h
CheckasmStats::samples
CheckasmSample samples[CHECKASM_STATS_SAMPLES]
Definition: stats.h:89
checkasm_context
checkasm_jmp_buf checkasm_context
Definition: signal.c:46
CheckasmPerf::start
uint64_t(* start)(void)
Start timing measurement.
Definition: test.h:533
checkasm_perf_init
COLD int checkasm_perf_init(void)
Definition: perf.c:59
checkasm_gettime_nsec
uint64_t checkasm_gettime_nsec(void)
Definition: utils.c:107
checkasm_save_context
#define checkasm_save_context(ctx)
Definition: longjmp.h:67
CheckasmPerf
Definition: test.h:527
checkasm_gettime_nsec_diff
uint64_t checkasm_gettime_nsec_diff(uint64_t t)
Definition: utils.c:112
checkasm_run_on_all_cores
int checkasm_run_on_all_cores(void(*func)(void))
Definition: checkasm.c:601
CheckasmPerf::stop
uint64_t(* stop)(uint64_t start_time)
Stop timing measurement.
Definition: test.h:540
CheckasmPerf::name
const char * name
Name of the timing mechanism (e.g., "clock_gettime")
Definition: test.h:543
target_cycles
uint64_t target_cycles
Definition: checkasm.c:117
checkasm_perf_init_linux
int checkasm_perf_init_linux(CheckasmPerf *perf)
checkasm_set_signal_handler_state
CHECKASM_API void checkasm_set_signal_handler_state(int enabled)
Enable or disable signal handling.
Definition: signal.c:52
checkasm_measure_perf_scale
COLD void checkasm_measure_perf_scale(CheckasmMeasurement *meas)
Definition: perf.c:209
perf_internal.h
checkasm_noop
void checkasm_noop(void *)
Definition: utils.c:63
checkasm_perf
CheckasmPerf checkasm_perf
Definition: perf.c:52
limits.h
checkasm_var_const
static CheckasmVar checkasm_var_const(double x)
Definition: stats.h:65
stats.h
NULL
#define NULL
Definition: coverity.c:32
cycles
uint64_t cycles
Definition: checkasm.c:94
CHECKASM_PERF_BENCH
#define CHECKASM_PERF_BENCH(count, time,...)
Definition: test.h:621
checkasm_perf_init_arm
int checkasm_perf_init_arm(CheckasmPerf *perf)
Definition: arm.c:148
ARRAY_SIZE
#define ARRAY_SIZE(a)
Definition: internal.h:81
i
#define i(width, name, range_min, range_max)
Definition: cbs_h264.c:63
CheckasmMeasurement::product
CheckasmVar product
Definition: stats.h:123
test.h
Test writing API for checkasm.
checkasm_measure_nop_cycles
COLD void checkasm_measure_nop_cycles(CheckasmMeasurement *meas, uint64_t target_cycles)
Definition: perf.c:176
stats
static CheckasmStats stats
Definition: checkasm.c:75
perf.h
CheckasmMeasurement
Definition: stats.h:122
checkasm_stats_count_grow
static void checkasm_stats_count_grow(CheckasmStats *const stats, uint64_t cycles, uint64_t target_cycles)
Definition: stats.h:108
CheckasmPerf::unit
const char * unit
Unit of measurement (e.g., "ns", "cycles")
Definition: test.h:546
CHECKASM_PERF_ASM_NAME
#define CHECKASM_PERF_ASM_NAME
Definition: aarch64.h:62
checkasm_get_perf
const CheckasmPerf * checkasm_get_perf(void)
Definition: perf.c:54
CheckasmSample
Definition: stats.h:80
checkasm_stats_reset
static void checkasm_stats_reset(CheckasmStats *const stats)
Definition: stats.h:94
internal.h
CheckasmStats::nb_samples
int nb_samples
Definition: stats.h:90
CheckasmStats
Definition: stats.h:85
checkasm_measurement_update
static void checkasm_measurement_update(CheckasmMeasurement *measurement, const CheckasmStats stats)
Definition: stats.h:135
checkasm_perf_validate_start_stop
COLD int checkasm_perf_validate_start_stop(const CheckasmPerf *perf)
Definition: perf.c:150
checkasm_perf_init_macos
int checkasm_perf_init_macos(CheckasmPerf *perf)
CHECKASM_PERF_ASM
#define CHECKASM_PERF_ASM()
Definition: aarch64.h:61
alternate
#define alternate
Definition: test.h:487
checkasm_perf_validate_start
COLD int checkasm_perf_validate_start(const CheckasmPerf *perf)
Definition: perf.c:128
checkasm_stats_add
static void checkasm_stats_add(CheckasmStats *const stats, const CheckasmSample s)
Definition: stats.h:100