33 #define pixel uint16_t 
   37 #define htype uint16_t 
   42 #define SHIFT   ((DEPTH + 1) / 2) 
   43 #define BINS    (1 << SHIFT) 
   44 #define MASK    (BINS - 1) 
   45 #define fn3(a,b)   a##_##b 
   46 #define fn2(a,b)   fn3(a,b) 
   47 #define fn(a)      fn2(a, DEPTH) 
   49 #define PICK_COARSE_BIN(x, y) (BINS * (x) + ((y) >> SHIFT)) 
   50 #define PICK_FINE_BIN(x, y, z) (BINS * ((x) * ((y) >> SHIFT) + (z)) + ((y) & MASK)) 
   54                              int slice_h_start, 
int slice_h_end, 
int jobnr)
 
   57     htype *ccoarse = 
s->coarse[jobnr];
 
   58     htype *cfine = 
s->fine[jobnr];
 
   59     const int radius = 
s->radius;
 
   60     const int radiusV = 
s->radiusV;
 
   67     src_linesize /= 
sizeof(
pixel);
 
   68     dst_linesize /= 
sizeof(
pixel);
 
   70     memset(cfine, 0, 
s->fine_size * 
sizeof(*cfine));
 
   71     memset(ccoarse, 0, 
s->coarse_size * 
sizeof(*ccoarse));
 
   73     srcp = 
src + 
FFMAX(0, slice_h_start - radiusV) * src_linesize;
 
   81     srcp = 
src + 
FFMAX(0, slice_h_start - radiusV - (jobnr != 0)) * src_linesize;
 
   82     for (
int i = 0; 
i < radiusV + (jobnr != 0) * (1 + radiusV); 
i++) {
 
   83         for (
int j = 0; j < 
width; j++) {
 
   92     for (
int i = slice_h_start; 
i < slice_h_end; 
i++) {
 
   97         p = srcp + src_linesize * 
FFMAX(0, 
i - radiusV - 1);
 
   98         for (
int j = 0; j < 
width; j++) {
 
  103         p = srcp + src_linesize * 
FFMIN(
height - 1, 
i + radiusV);
 
  104         for (
int j = 0; j < 
width; j++) {
 
  109         s->hmuladd(coarse, &ccoarse[0], radius, 
BINS);
 
  110         for (
int j = 0; j < radius; j++)
 
  111             s->hadd(coarse, &ccoarse[
BINS * j], 
BINS);
 
  112         for (
int k = 0; k < 
BINS; k++)
 
  113             s->hmuladd(&fine[k][0], &cfine[
BINS * 
width * k], 2 * radius + 1, 
BINS);
 
  115         for (
int j = 0; j < 
width; j++) {
 
  121             for (k = 0; k < 
BINS; k++) {
 
  130             if (luc[k] <= j - radius) {
 
  131                 memset(&fine[k], 0, 
BINS * 
sizeof(
htype));
 
  132                 for (luc[k] = j - radius; luc[k] < 
FFMIN(j + radius + 1, 
width); luc[k]++)
 
  134                 if (luc[k] < j + radius + 1) {
 
  136                     luc[k] = j + radius + 1;
 
  139                 for (; luc[k] < j + radius + 1; luc[k]++) {
 
  151                     dst[j] = 
BINS * k + 
b;