FFmpeg  4.4.4
nlmeans.c
Go to the documentation of this file.
1 // Generated from libavfilter/opencl/nlmeans.cl
3 "#line 1 \"libavfilter/opencl/nlmeans.cl\"\n"
4 "/*\n"
5 " * This file is part of FFmpeg.\n"
6 " *\n"
7 " * FFmpeg is free software; you can redistribute it and/or\n"
8 " * modify it under the terms of the GNU Lesser General Public\n"
9 " * License as published by the Free Software Foundation; either\n"
10 " * version 2.1 of the License, or (at your option) any later version.\n"
11 " *\n"
12 " * FFmpeg is distributed in the hope that it will be useful,\n"
13 " * but WITHOUT ANY WARRANTY; without even the implied warranty of\n"
14 " * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU\n"
15 " * Lesser General Public License for more details.\n"
16 " *\n"
17 " * You should have received a copy of the GNU Lesser General Public\n"
18 " * License along with FFmpeg; if not, write to the Free Software\n"
19 " * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA\n"
20 " */\n"
21 "\n"
22 "const sampler_t sampler = (CLK_NORMALIZED_COORDS_FALSE |\n"
23 " CLK_ADDRESS_CLAMP_TO_EDGE |\n"
24 " CLK_FILTER_NEAREST);\n"
25 "\n"
26 "kernel void horiz_sum(__global uint4 *integral_img,\n"
27 " __read_only image2d_t src,\n"
28 " int width,\n"
29 " int height,\n"
30 " int4 dx,\n"
31 " int4 dy)\n"
32 "{\n"
33 "\n"
34 " int y = get_global_id(0);\n"
35 " int work_size = get_global_size(0);\n"
36 "\n"
37 " uint4 sum = (uint4)(0);\n"
38 " float4 s2;\n"
39 " for (int i = 0; i < width; i++) {\n"
40 " float s1 = read_imagef(src, sampler, (int2)(i, y)).x;\n"
41 " s2.x = read_imagef(src, sampler, (int2)(i + dx.x, y + dy.x)).x;\n"
42 " s2.y = read_imagef(src, sampler, (int2)(i + dx.y, y + dy.y)).x;\n"
43 " s2.z = read_imagef(src, sampler, (int2)(i + dx.z, y + dy.z)).x;\n"
44 " s2.w = read_imagef(src, sampler, (int2)(i + dx.w, y + dy.w)).x;\n"
45 " sum += convert_uint4((s1 - s2) * (s1 - s2) * 255 * 255);\n"
46 " integral_img[y * width + i] = sum;\n"
47 " }\n"
48 "}\n"
49 "\n"
50 "kernel void vert_sum(__global uint4 *integral_img,\n"
51 " __global int *overflow,\n"
52 " int width,\n"
53 " int height)\n"
54 "{\n"
55 " int x = get_global_id(0);\n"
56 " uint4 sum = 0;\n"
57 " for (int i = 0; i < height; i++) {\n"
58 " if (any((uint4)UINT_MAX - integral_img[i * width + x] < sum))\n"
59 " atomic_inc(overflow);\n"
60 " integral_img[i * width + x] += sum;\n"
61 " sum = integral_img[i * width + x];\n"
62 " }\n"
63 "}\n"
64 "\n"
65 "kernel void weight_accum(global float *sum, global float *weight,\n"
66 " global uint4 *integral_img, __read_only image2d_t src,\n"
67 " int width, int height, int p, float h,\n"
68 " int4 dx, int4 dy)\n"
69 "{\n"
70 " // w(x) = integral_img(x-p, y-p) +\n"
71 " // integral_img(x+p, y+p) -\n"
72 " // integral_img(x+p, y-p) -\n"
73 " // integral_img(x-p, y+p)\n"
74 " // total_sum[x] += w(x, y) * src(x + dx, y + dy)\n"
75 " // total_weight += w(x, y)\n"
76 "\n"
77 " int x = get_global_id(0);\n"
78 " int y = get_global_id(1);\n"
79 " int4 xoff = x + dx;\n"
80 " int4 yoff = y + dy;\n"
81 " uint4 a = 0, b = 0, c = 0, d = 0;\n"
82 " uint4 src_pix = 0;\n"
83 "\n"
84 " // out-of-bounding-box?\n"
85 " int oobb = (x - p) < 0 || (y - p) < 0 || (y + p) >= height || (x + p) >= width;\n"
86 "\n"
87 " src_pix.x = (int)(255 * read_imagef(src, sampler, (int2)(xoff.x, yoff.x)).x);\n"
88 " src_pix.y = (int)(255 * read_imagef(src, sampler, (int2)(xoff.y, yoff.y)).x);\n"
89 " src_pix.z = (int)(255 * read_imagef(src, sampler, (int2)(xoff.z, yoff.z)).x);\n"
90 " src_pix.w = (int)(255 * read_imagef(src, sampler, (int2)(xoff.w, yoff.w)).x);\n"
91 " if (!oobb) {\n"
92 " a = integral_img[(y - p) * width + x - p];\n"
93 " b = integral_img[(y + p) * width + x - p];\n"
94 " c = integral_img[(y - p) * width + x + p];\n"
95 " d = integral_img[(y + p) * width + x + p];\n"
96 " }\n"
97 "\n"
98 " float4 patch_diff = convert_float4(d + a - c - b);\n"
99 " float4 w = native_exp(-patch_diff / (h * h));\n"
100 " float w_sum = w.x + w.y + w.z + w.w;\n"
101 " weight[y * width + x] += w_sum;\n"
102 " sum[y * width + x] += dot(w, convert_float4(src_pix));\n"
103 "}\n"
104 "\n"
105 "kernel void average(__write_only image2d_t dst,\n"
106 " __read_only image2d_t src,\n"
107 " global float *sum, global float *weight) {\n"
108 " int x = get_global_id(0);\n"
109 " int y = get_global_id(1);\n"
110 " int2 dim = get_image_dim(dst);\n"
111 "\n"
112 " float w = weight[y * dim.x + x];\n"
113 " float s = sum[y * dim.x + x];\n"
114 " float src_pix = read_imagef(src, sampler, (int2)(x, y)).x;\n"
115 " float r = (s + src_pix * 255) / (1.0f + w) / 255.0f;\n"
116 " if (x < dim.x && y < dim.y)\n"
117 " write_imagef(dst, (int2)(x, y), (float4)(r, 0.0f, 0.0f, 1.0f));\n"
118 "}\n"
119 ;
const char * ff_opencl_source_nlmeans
Definition: nlmeans.c:2