FFmpeg  4.4.4
vf_scale_cuda.ptx.c
Go to the documentation of this file.
1 const char vf_scale_cuda_ptx[] = \
2  "//\n"
3  "// Generated by LLVM NVPTX Back-End\n"
4  "//\n"
5  "\n"
6  ".version 3.2\n"
7  ".target sm_30\n"
8  ".address_size 64\n"
9  "\n"
10  " // .globl Subsample_Nearest_uchar\n"
11  "\n"
12  ".visible .entry Subsample_Nearest_uchar(\n"
13  " .param .u64 Subsample_Nearest_uchar_param_0,\n"
14  " .param .u64 Subsample_Nearest_uchar_param_1,\n"
15  " .param .u32 Subsample_Nearest_uchar_param_2,\n"
16  " .param .u32 Subsample_Nearest_uchar_param_3,\n"
17  " .param .u32 Subsample_Nearest_uchar_param_4,\n"
18  " .param .u32 Subsample_Nearest_uchar_param_5,\n"
19  " .param .u32 Subsample_Nearest_uchar_param_6,\n"
20  " .param .u32 Subsample_Nearest_uchar_param_7\n"
21  ")\n"
22  "{\n"
23  " .reg .pred %p<4>;\n"
24  " .reg .f32 %f<13>;\n"
25  " .reg .b32 %r<19>;\n"
26  " .reg .b64 %rd<7>;\n"
27  "\n"
28  " ld.param.u32 %r4, [Subsample_Nearest_uchar_param_3];\n"
29  " ld.param.u32 %r3, [Subsample_Nearest_uchar_param_2];\n"
30  " // begin inline asm\n"
31  " mov.u32 %r8, %ctaid.x;\n"
32  " // end inline asm\n"
33  " // begin inline asm\n"
34  " mov.u32 %r9, %ctaid.y;\n"
35  " // end inline asm\n"
36  " // begin inline asm\n"
37  " mov.u32 %r10, %ntid.x;\n"
38  " // end inline asm\n"
39  " // begin inline asm\n"
40  " mov.u32 %r11, %ntid.y;\n"
41  " // end inline asm\n"
42  " // begin inline asm\n"
43  " mov.u32 %r12, %tid.x;\n"
44  " // end inline asm\n"
45  " // begin inline asm\n"
46  " mov.u32 %r13, %tid.y;\n"
47  " // end inline asm\n"
48  " mad.lo.s32 %r1, %r10, %r8, %r12;\n"
49  " mad.lo.s32 %r2, %r11, %r9, %r13;\n"
50  " setp.ge.s32 %p1, %r2, %r4;\n"
51  " setp.ge.s32 %p2, %r1, %r3;\n"
52  " or.pred %p3, %p2, %p1;\n"
53  " @%p3 bra LBB0_2;\n"
54  " ld.param.u32 %r7, [Subsample_Nearest_uchar_param_6];\n"
55  " ld.param.u32 %r6, [Subsample_Nearest_uchar_param_5];\n"
56  " ld.param.u32 %r5, [Subsample_Nearest_uchar_param_4];\n"
57  " ld.param.u64 %rd4, [Subsample_Nearest_uchar_param_0];\n"
58  " ld.param.u64 %rd3, [Subsample_Nearest_uchar_param_1];\n"
59  " cvta.to.global.u64 %rd1, %rd3;\n"
60  " cvt.rn.f32.s32 %f3, %r6;\n"
61  " cvt.rn.f32.s32 %f4, %r3;\n"
62  " div.rn.f32 %f5, %f3, %f4;\n"
63  " cvt.rn.f32.s32 %f6, %r7;\n"
64  " cvt.rn.f32.s32 %f7, %r4;\n"
65  " div.rn.f32 %f8, %f6, %f7;\n"
66  " cvt.rn.f32.s32 %f9, %r1;\n"
67  " add.f32 %f10, %f9, 0f3F000000;\n"
68  " mul.f32 %f1, %f5, %f10;\n"
69  " cvt.rn.f32.s32 %f11, %r2;\n"
70  " add.f32 %f12, %f11, 0f3F000000;\n"
71  " mul.f32 %f2, %f8, %f12;\n"
72  " // begin inline asm\n"
73  " tex.2d.v4.u32.f32 {%r14, %r15, %r16, %r17}, [%rd4, {%f1, %f2}];\n"
74  " // end inline asm\n"
75  " mad.lo.s32 %r18, %r2, %r5, %r1;\n"
76  " cvt.s64.s32 %rd5, %r18;\n"
77  " add.s64 %rd6, %rd1, %rd5;\n"
78  " st.global.u8 [%rd6], %r14;\n"
79  "LBB0_2:\n"
80  " ret;\n"
81  "\n"
82  "}\n"
83  " // .globl Subsample_Nearest_uchar2\n"
84  ".visible .entry Subsample_Nearest_uchar2(\n"
85  " .param .u64 Subsample_Nearest_uchar2_param_0,\n"
86  " .param .u64 Subsample_Nearest_uchar2_param_1,\n"
87  " .param .u32 Subsample_Nearest_uchar2_param_2,\n"
88  " .param .u32 Subsample_Nearest_uchar2_param_3,\n"
89  " .param .u32 Subsample_Nearest_uchar2_param_4,\n"
90  " .param .u32 Subsample_Nearest_uchar2_param_5,\n"
91  " .param .u32 Subsample_Nearest_uchar2_param_6,\n"
92  " .param .u32 Subsample_Nearest_uchar2_param_7\n"
93  ")\n"
94  "{\n"
95  " .reg .pred %p<4>;\n"
96  " .reg .b16 %rs<3>;\n"
97  " .reg .f32 %f<13>;\n"
98  " .reg .b32 %r<19>;\n"
99  " .reg .b64 %rd<7>;\n"
100  "\n"
101  " ld.param.u32 %r4, [Subsample_Nearest_uchar2_param_3];\n"
102  " ld.param.u32 %r3, [Subsample_Nearest_uchar2_param_2];\n"
103  " // begin inline asm\n"
104  " mov.u32 %r8, %ctaid.x;\n"
105  " // end inline asm\n"
106  " // begin inline asm\n"
107  " mov.u32 %r9, %ctaid.y;\n"
108  " // end inline asm\n"
109  " // begin inline asm\n"
110  " mov.u32 %r10, %ntid.x;\n"
111  " // end inline asm\n"
112  " // begin inline asm\n"
113  " mov.u32 %r11, %ntid.y;\n"
114  " // end inline asm\n"
115  " // begin inline asm\n"
116  " mov.u32 %r12, %tid.x;\n"
117  " // end inline asm\n"
118  " // begin inline asm\n"
119  " mov.u32 %r13, %tid.y;\n"
120  " // end inline asm\n"
121  " mad.lo.s32 %r1, %r10, %r8, %r12;\n"
122  " mad.lo.s32 %r2, %r11, %r9, %r13;\n"
123  " setp.ge.s32 %p1, %r2, %r4;\n"
124  " setp.ge.s32 %p2, %r1, %r3;\n"
125  " or.pred %p3, %p2, %p1;\n"
126  " @%p3 bra LBB1_2;\n"
127  " ld.param.u32 %r7, [Subsample_Nearest_uchar2_param_6];\n"
128  " ld.param.u32 %r6, [Subsample_Nearest_uchar2_param_5];\n"
129  " ld.param.u32 %r5, [Subsample_Nearest_uchar2_param_4];\n"
130  " ld.param.u64 %rd4, [Subsample_Nearest_uchar2_param_0];\n"
131  " ld.param.u64 %rd3, [Subsample_Nearest_uchar2_param_1];\n"
132  " cvta.to.global.u64 %rd1, %rd3;\n"
133  " cvt.rn.f32.s32 %f3, %r6;\n"
134  " cvt.rn.f32.s32 %f4, %r3;\n"
135  " div.rn.f32 %f5, %f3, %f4;\n"
136  " cvt.rn.f32.s32 %f6, %r7;\n"
137  " cvt.rn.f32.s32 %f7, %r4;\n"
138  " div.rn.f32 %f8, %f6, %f7;\n"
139  " cvt.rn.f32.s32 %f9, %r1;\n"
140  " add.f32 %f10, %f9, 0f3F000000;\n"
141  " mul.f32 %f1, %f5, %f10;\n"
142  " cvt.rn.f32.s32 %f11, %r2;\n"
143  " add.f32 %f12, %f11, 0f3F000000;\n"
144  " mul.f32 %f2, %f8, %f12;\n"
145  " // begin inline asm\n"
146  " tex.2d.v4.u32.f32 {%r14, %r15, %r16, %r17}, [%rd4, {%f1, %f2}];\n"
147  " // end inline asm\n"
148  " cvt.u16.u32 %rs1, %r14;\n"
149  " cvt.u16.u32 %rs2, %r15;\n"
150  " mad.lo.s32 %r18, %r2, %r5, %r1;\n"
151  " mul.wide.s32 %rd5, %r18, 2;\n"
152  " add.s64 %rd6, %rd1, %rd5;\n"
153  " st.global.v2.u8 [%rd6], {%rs1, %rs2};\n"
154  "LBB1_2:\n"
155  " ret;\n"
156  "\n"
157  "}\n"
158  " // .globl Subsample_Nearest_uchar4\n"
159  ".visible .entry Subsample_Nearest_uchar4(\n"
160  " .param .u64 Subsample_Nearest_uchar4_param_0,\n"
161  " .param .u64 Subsample_Nearest_uchar4_param_1,\n"
162  " .param .u32 Subsample_Nearest_uchar4_param_2,\n"
163  " .param .u32 Subsample_Nearest_uchar4_param_3,\n"
164  " .param .u32 Subsample_Nearest_uchar4_param_4,\n"
165  " .param .u32 Subsample_Nearest_uchar4_param_5,\n"
166  " .param .u32 Subsample_Nearest_uchar4_param_6,\n"
167  " .param .u32 Subsample_Nearest_uchar4_param_7\n"
168  ")\n"
169  "{\n"
170  " .reg .pred %p<4>;\n"
171  " .reg .b16 %rs<5>;\n"
172  " .reg .f32 %f<13>;\n"
173  " .reg .b32 %r<19>;\n"
174  " .reg .b64 %rd<7>;\n"
175  "\n"
176  " ld.param.u32 %r4, [Subsample_Nearest_uchar4_param_3];\n"
177  " ld.param.u32 %r3, [Subsample_Nearest_uchar4_param_2];\n"
178  " // begin inline asm\n"
179  " mov.u32 %r8, %ctaid.x;\n"
180  " // end inline asm\n"
181  " // begin inline asm\n"
182  " mov.u32 %r9, %ctaid.y;\n"
183  " // end inline asm\n"
184  " // begin inline asm\n"
185  " mov.u32 %r10, %ntid.x;\n"
186  " // end inline asm\n"
187  " // begin inline asm\n"
188  " mov.u32 %r11, %ntid.y;\n"
189  " // end inline asm\n"
190  " // begin inline asm\n"
191  " mov.u32 %r12, %tid.x;\n"
192  " // end inline asm\n"
193  " // begin inline asm\n"
194  " mov.u32 %r13, %tid.y;\n"
195  " // end inline asm\n"
196  " mad.lo.s32 %r1, %r10, %r8, %r12;\n"
197  " mad.lo.s32 %r2, %r11, %r9, %r13;\n"
198  " setp.ge.s32 %p1, %r2, %r4;\n"
199  " setp.ge.s32 %p2, %r1, %r3;\n"
200  " or.pred %p3, %p2, %p1;\n"
201  " @%p3 bra LBB2_2;\n"
202  " ld.param.u32 %r7, [Subsample_Nearest_uchar4_param_6];\n"
203  " ld.param.u32 %r6, [Subsample_Nearest_uchar4_param_5];\n"
204  " ld.param.u32 %r5, [Subsample_Nearest_uchar4_param_4];\n"
205  " ld.param.u64 %rd4, [Subsample_Nearest_uchar4_param_0];\n"
206  " ld.param.u64 %rd3, [Subsample_Nearest_uchar4_param_1];\n"
207  " cvta.to.global.u64 %rd1, %rd3;\n"
208  " cvt.rn.f32.s32 %f3, %r6;\n"
209  " cvt.rn.f32.s32 %f4, %r3;\n"
210  " div.rn.f32 %f5, %f3, %f4;\n"
211  " cvt.rn.f32.s32 %f6, %r7;\n"
212  " cvt.rn.f32.s32 %f7, %r4;\n"
213  " div.rn.f32 %f8, %f6, %f7;\n"
214  " cvt.rn.f32.s32 %f9, %r1;\n"
215  " add.f32 %f10, %f9, 0f3F000000;\n"
216  " mul.f32 %f1, %f5, %f10;\n"
217  " cvt.rn.f32.s32 %f11, %r2;\n"
218  " add.f32 %f12, %f11, 0f3F000000;\n"
219  " mul.f32 %f2, %f8, %f12;\n"
220  " // begin inline asm\n"
221  " tex.2d.v4.u32.f32 {%r14, %r15, %r16, %r17}, [%rd4, {%f1, %f2}];\n"
222  " // end inline asm\n"
223  " cvt.u16.u32 %rs1, %r14;\n"
224  " cvt.u16.u32 %rs2, %r15;\n"
225  " cvt.u16.u32 %rs3, %r16;\n"
226  " cvt.u16.u32 %rs4, %r17;\n"
227  " mad.lo.s32 %r18, %r2, %r5, %r1;\n"
228  " mul.wide.s32 %rd5, %r18, 4;\n"
229  " add.s64 %rd6, %rd1, %rd5;\n"
230  " st.global.v4.u8 [%rd6], {%rs1, %rs2, %rs3, %rs4};\n"
231  "LBB2_2:\n"
232  " ret;\n"
233  "\n"
234  "}\n"
235  " // .globl Subsample_Nearest_ushort\n"
236  ".visible .entry Subsample_Nearest_ushort(\n"
237  " .param .u64 Subsample_Nearest_ushort_param_0,\n"
238  " .param .u64 Subsample_Nearest_ushort_param_1,\n"
239  " .param .u32 Subsample_Nearest_ushort_param_2,\n"
240  " .param .u32 Subsample_Nearest_ushort_param_3,\n"
241  " .param .u32 Subsample_Nearest_ushort_param_4,\n"
242  " .param .u32 Subsample_Nearest_ushort_param_5,\n"
243  " .param .u32 Subsample_Nearest_ushort_param_6,\n"
244  " .param .u32 Subsample_Nearest_ushort_param_7\n"
245  ")\n"
246  "{\n"
247  " .reg .pred %p<4>;\n"
248  " .reg .f32 %f<13>;\n"
249  " .reg .b32 %r<19>;\n"
250  " .reg .b64 %rd<7>;\n"
251  "\n"
252  " ld.param.u32 %r4, [Subsample_Nearest_ushort_param_3];\n"
253  " ld.param.u32 %r3, [Subsample_Nearest_ushort_param_2];\n"
254  " // begin inline asm\n"
255  " mov.u32 %r8, %ctaid.x;\n"
256  " // end inline asm\n"
257  " // begin inline asm\n"
258  " mov.u32 %r9, %ctaid.y;\n"
259  " // end inline asm\n"
260  " // begin inline asm\n"
261  " mov.u32 %r10, %ntid.x;\n"
262  " // end inline asm\n"
263  " // begin inline asm\n"
264  " mov.u32 %r11, %ntid.y;\n"
265  " // end inline asm\n"
266  " // begin inline asm\n"
267  " mov.u32 %r12, %tid.x;\n"
268  " // end inline asm\n"
269  " // begin inline asm\n"
270  " mov.u32 %r13, %tid.y;\n"
271  " // end inline asm\n"
272  " mad.lo.s32 %r1, %r10, %r8, %r12;\n"
273  " mad.lo.s32 %r2, %r11, %r9, %r13;\n"
274  " setp.ge.s32 %p1, %r2, %r4;\n"
275  " setp.ge.s32 %p2, %r1, %r3;\n"
276  " or.pred %p3, %p2, %p1;\n"
277  " @%p3 bra LBB3_2;\n"
278  " ld.param.u32 %r7, [Subsample_Nearest_ushort_param_6];\n"
279  " ld.param.u32 %r6, [Subsample_Nearest_ushort_param_5];\n"
280  " ld.param.u32 %r5, [Subsample_Nearest_ushort_param_4];\n"
281  " ld.param.u64 %rd4, [Subsample_Nearest_ushort_param_0];\n"
282  " ld.param.u64 %rd3, [Subsample_Nearest_ushort_param_1];\n"
283  " cvta.to.global.u64 %rd1, %rd3;\n"
284  " cvt.rn.f32.s32 %f3, %r6;\n"
285  " cvt.rn.f32.s32 %f4, %r3;\n"
286  " div.rn.f32 %f5, %f3, %f4;\n"
287  " cvt.rn.f32.s32 %f6, %r7;\n"
288  " cvt.rn.f32.s32 %f7, %r4;\n"
289  " div.rn.f32 %f8, %f6, %f7;\n"
290  " cvt.rn.f32.s32 %f9, %r1;\n"
291  " add.f32 %f10, %f9, 0f3F000000;\n"
292  " mul.f32 %f1, %f5, %f10;\n"
293  " cvt.rn.f32.s32 %f11, %r2;\n"
294  " add.f32 %f12, %f11, 0f3F000000;\n"
295  " mul.f32 %f2, %f8, %f12;\n"
296  " // begin inline asm\n"
297  " tex.2d.v4.u32.f32 {%r14, %r15, %r16, %r17}, [%rd4, {%f1, %f2}];\n"
298  " // end inline asm\n"
299  " mad.lo.s32 %r18, %r2, %r5, %r1;\n"
300  " mul.wide.s32 %rd5, %r18, 2;\n"
301  " add.s64 %rd6, %rd1, %rd5;\n"
302  " st.global.u16 [%rd6], %r14;\n"
303  "LBB3_2:\n"
304  " ret;\n"
305  "\n"
306  "}\n"
307  " // .globl Subsample_Nearest_ushort2\n"
308  ".visible .entry Subsample_Nearest_ushort2(\n"
309  " .param .u64 Subsample_Nearest_ushort2_param_0,\n"
310  " .param .u64 Subsample_Nearest_ushort2_param_1,\n"
311  " .param .u32 Subsample_Nearest_ushort2_param_2,\n"
312  " .param .u32 Subsample_Nearest_ushort2_param_3,\n"
313  " .param .u32 Subsample_Nearest_ushort2_param_4,\n"
314  " .param .u32 Subsample_Nearest_ushort2_param_5,\n"
315  " .param .u32 Subsample_Nearest_ushort2_param_6,\n"
316  " .param .u32 Subsample_Nearest_ushort2_param_7\n"
317  ")\n"
318  "{\n"
319  " .reg .pred %p<4>;\n"
320  " .reg .b16 %rs<3>;\n"
321  " .reg .f32 %f<13>;\n"
322  " .reg .b32 %r<19>;\n"
323  " .reg .b64 %rd<7>;\n"
324  "\n"
325  " ld.param.u32 %r4, [Subsample_Nearest_ushort2_param_3];\n"
326  " ld.param.u32 %r3, [Subsample_Nearest_ushort2_param_2];\n"
327  " // begin inline asm\n"
328  " mov.u32 %r8, %ctaid.x;\n"
329  " // end inline asm\n"
330  " // begin inline asm\n"
331  " mov.u32 %r9, %ctaid.y;\n"
332  " // end inline asm\n"
333  " // begin inline asm\n"
334  " mov.u32 %r10, %ntid.x;\n"
335  " // end inline asm\n"
336  " // begin inline asm\n"
337  " mov.u32 %r11, %ntid.y;\n"
338  " // end inline asm\n"
339  " // begin inline asm\n"
340  " mov.u32 %r12, %tid.x;\n"
341  " // end inline asm\n"
342  " // begin inline asm\n"
343  " mov.u32 %r13, %tid.y;\n"
344  " // end inline asm\n"
345  " mad.lo.s32 %r1, %r10, %r8, %r12;\n"
346  " mad.lo.s32 %r2, %r11, %r9, %r13;\n"
347  " setp.ge.s32 %p1, %r2, %r4;\n"
348  " setp.ge.s32 %p2, %r1, %r3;\n"
349  " or.pred %p3, %p2, %p1;\n"
350  " @%p3 bra LBB4_2;\n"
351  " ld.param.u32 %r7, [Subsample_Nearest_ushort2_param_6];\n"
352  " ld.param.u32 %r6, [Subsample_Nearest_ushort2_param_5];\n"
353  " ld.param.u32 %r5, [Subsample_Nearest_ushort2_param_4];\n"
354  " ld.param.u64 %rd4, [Subsample_Nearest_ushort2_param_0];\n"
355  " ld.param.u64 %rd3, [Subsample_Nearest_ushort2_param_1];\n"
356  " cvta.to.global.u64 %rd1, %rd3;\n"
357  " cvt.rn.f32.s32 %f3, %r6;\n"
358  " cvt.rn.f32.s32 %f4, %r3;\n"
359  " div.rn.f32 %f5, %f3, %f4;\n"
360  " cvt.rn.f32.s32 %f6, %r7;\n"
361  " cvt.rn.f32.s32 %f7, %r4;\n"
362  " div.rn.f32 %f8, %f6, %f7;\n"
363  " cvt.rn.f32.s32 %f9, %r1;\n"
364  " add.f32 %f10, %f9, 0f3F000000;\n"
365  " mul.f32 %f1, %f5, %f10;\n"
366  " cvt.rn.f32.s32 %f11, %r2;\n"
367  " add.f32 %f12, %f11, 0f3F000000;\n"
368  " mul.f32 %f2, %f8, %f12;\n"
369  " // begin inline asm\n"
370  " tex.2d.v4.u32.f32 {%r14, %r15, %r16, %r17}, [%rd4, {%f1, %f2}];\n"
371  " // end inline asm\n"
372  " cvt.u16.u32 %rs1, %r14;\n"
373  " cvt.u16.u32 %rs2, %r15;\n"
374  " mad.lo.s32 %r18, %r2, %r5, %r1;\n"
375  " mul.wide.s32 %rd5, %r18, 4;\n"
376  " add.s64 %rd6, %rd1, %rd5;\n"
377  " st.global.v2.u16 [%rd6], {%rs1, %rs2};\n"
378  "LBB4_2:\n"
379  " ret;\n"
380  "\n"
381  "}\n"
382  " // .globl Subsample_Nearest_ushort4\n"
383  ".visible .entry Subsample_Nearest_ushort4(\n"
384  " .param .u64 Subsample_Nearest_ushort4_param_0,\n"
385  " .param .u64 Subsample_Nearest_ushort4_param_1,\n"
386  " .param .u32 Subsample_Nearest_ushort4_param_2,\n"
387  " .param .u32 Subsample_Nearest_ushort4_param_3,\n"
388  " .param .u32 Subsample_Nearest_ushort4_param_4,\n"
389  " .param .u32 Subsample_Nearest_ushort4_param_5,\n"
390  " .param .u32 Subsample_Nearest_ushort4_param_6,\n"
391  " .param .u32 Subsample_Nearest_ushort4_param_7\n"
392  ")\n"
393  "{\n"
394  " .reg .pred %p<4>;\n"
395  " .reg .b16 %rs<5>;\n"
396  " .reg .f32 %f<13>;\n"
397  " .reg .b32 %r<19>;\n"
398  " .reg .b64 %rd<7>;\n"
399  "\n"
400  " ld.param.u32 %r4, [Subsample_Nearest_ushort4_param_3];\n"
401  " ld.param.u32 %r3, [Subsample_Nearest_ushort4_param_2];\n"
402  " // begin inline asm\n"
403  " mov.u32 %r8, %ctaid.x;\n"
404  " // end inline asm\n"
405  " // begin inline asm\n"
406  " mov.u32 %r9, %ctaid.y;\n"
407  " // end inline asm\n"
408  " // begin inline asm\n"
409  " mov.u32 %r10, %ntid.x;\n"
410  " // end inline asm\n"
411  " // begin inline asm\n"
412  " mov.u32 %r11, %ntid.y;\n"
413  " // end inline asm\n"
414  " // begin inline asm\n"
415  " mov.u32 %r12, %tid.x;\n"
416  " // end inline asm\n"
417  " // begin inline asm\n"
418  " mov.u32 %r13, %tid.y;\n"
419  " // end inline asm\n"
420  " mad.lo.s32 %r1, %r10, %r8, %r12;\n"
421  " mad.lo.s32 %r2, %r11, %r9, %r13;\n"
422  " setp.ge.s32 %p1, %r2, %r4;\n"
423  " setp.ge.s32 %p2, %r1, %r3;\n"
424  " or.pred %p3, %p2, %p1;\n"
425  " @%p3 bra LBB5_2;\n"
426  " ld.param.u32 %r7, [Subsample_Nearest_ushort4_param_6];\n"
427  " ld.param.u32 %r6, [Subsample_Nearest_ushort4_param_5];\n"
428  " ld.param.u32 %r5, [Subsample_Nearest_ushort4_param_4];\n"
429  " ld.param.u64 %rd4, [Subsample_Nearest_ushort4_param_0];\n"
430  " ld.param.u64 %rd3, [Subsample_Nearest_ushort4_param_1];\n"
431  " cvta.to.global.u64 %rd1, %rd3;\n"
432  " cvt.rn.f32.s32 %f3, %r6;\n"
433  " cvt.rn.f32.s32 %f4, %r3;\n"
434  " div.rn.f32 %f5, %f3, %f4;\n"
435  " cvt.rn.f32.s32 %f6, %r7;\n"
436  " cvt.rn.f32.s32 %f7, %r4;\n"
437  " div.rn.f32 %f8, %f6, %f7;\n"
438  " cvt.rn.f32.s32 %f9, %r1;\n"
439  " add.f32 %f10, %f9, 0f3F000000;\n"
440  " mul.f32 %f1, %f5, %f10;\n"
441  " cvt.rn.f32.s32 %f11, %r2;\n"
442  " add.f32 %f12, %f11, 0f3F000000;\n"
443  " mul.f32 %f2, %f8, %f12;\n"
444  " // begin inline asm\n"
445  " tex.2d.v4.u32.f32 {%r14, %r15, %r16, %r17}, [%rd4, {%f1, %f2}];\n"
446  " // end inline asm\n"
447  " cvt.u16.u32 %rs1, %r14;\n"
448  " cvt.u16.u32 %rs2, %r15;\n"
449  " cvt.u16.u32 %rs3, %r16;\n"
450  " cvt.u16.u32 %rs4, %r17;\n"
451  " mad.lo.s32 %r18, %r2, %r5, %r1;\n"
452  " mul.wide.s32 %rd5, %r18, 8;\n"
453  " add.s64 %rd6, %rd1, %rd5;\n"
454  " st.global.v4.u16 [%rd6], {%rs1, %rs2, %rs3, %rs4};\n"
455  "LBB5_2:\n"
456  " ret;\n"
457  "\n"
458  "}\n"
459  " // .globl Subsample_Bilinear_uchar\n"
460  ".visible .entry Subsample_Bilinear_uchar(\n"
461  " .param .u64 Subsample_Bilinear_uchar_param_0,\n"
462  " .param .u64 Subsample_Bilinear_uchar_param_1,\n"
463  " .param .u32 Subsample_Bilinear_uchar_param_2,\n"
464  " .param .u32 Subsample_Bilinear_uchar_param_3,\n"
465  " .param .u32 Subsample_Bilinear_uchar_param_4,\n"
466  " .param .u32 Subsample_Bilinear_uchar_param_5,\n"
467  " .param .u32 Subsample_Bilinear_uchar_param_6,\n"
468  " .param .u32 Subsample_Bilinear_uchar_param_7\n"
469  ")\n"
470  "{\n"
471  " .reg .pred %p<4>;\n"
472  " .reg .f32 %f<33>;\n"
473  " .reg .b32 %r<40>;\n"
474  " .reg .b64 %rd<10>;\n"
475  "\n"
476  " ld.param.u32 %r4, [Subsample_Bilinear_uchar_param_3];\n"
477  " ld.param.u32 %r3, [Subsample_Bilinear_uchar_param_2];\n"
478  " // begin inline asm\n"
479  " mov.u32 %r8, %ctaid.x;\n"
480  " // end inline asm\n"
481  " // begin inline asm\n"
482  " mov.u32 %r9, %ctaid.y;\n"
483  " // end inline asm\n"
484  " // begin inline asm\n"
485  " mov.u32 %r10, %ntid.x;\n"
486  " // end inline asm\n"
487  " // begin inline asm\n"
488  " mov.u32 %r11, %ntid.y;\n"
489  " // end inline asm\n"
490  " // begin inline asm\n"
491  " mov.u32 %r12, %tid.x;\n"
492  " // end inline asm\n"
493  " // begin inline asm\n"
494  " mov.u32 %r13, %tid.y;\n"
495  " // end inline asm\n"
496  " mad.lo.s32 %r1, %r10, %r8, %r12;\n"
497  " mad.lo.s32 %r2, %r11, %r9, %r13;\n"
498  " setp.ge.s32 %p1, %r2, %r4;\n"
499  " setp.ge.s32 %p2, %r1, %r3;\n"
500  " or.pred %p3, %p2, %p1;\n"
501  " @%p3 bra LBB6_2;\n"
502  " ld.param.u32 %r7, [Subsample_Bilinear_uchar_param_6];\n"
503  " ld.param.u32 %r6, [Subsample_Bilinear_uchar_param_5];\n"
504  " ld.param.u32 %r5, [Subsample_Bilinear_uchar_param_4];\n"
505  " ld.param.u64 %rd4, [Subsample_Bilinear_uchar_param_0];\n"
506  " ld.param.u64 %rd3, [Subsample_Bilinear_uchar_param_1];\n"
507  " cvta.to.global.u64 %rd1, %rd3;\n"
508  " cvt.rn.f32.s32 %f9, %r6;\n"
509  " cvt.rn.f32.s32 %f10, %r3;\n"
510  " div.rn.f32 %f11, %f9, %f10;\n"
511  " cvt.rn.f32.s32 %f12, %r7;\n"
512  " cvt.rn.f32.s32 %f13, %r4;\n"
513  " div.rn.f32 %f14, %f12, %f13;\n"
514  " cvt.rn.f32.s32 %f15, %r1;\n"
515  " add.f32 %f16, %f15, 0f3F000000;\n"
516  " cvt.rn.f32.s32 %f17, %r2;\n"
517  " add.f32 %f18, %f17, 0f3F000000;\n"
518  " add.f32 %f19, %f11, 0fBF800000;\n"
519  " mul.f32 %f20, %f19, 0f3F000000;\n"
520  " max.f32 %f21, %f20, 0f00000000;\n"
521  " min.f32 %f22, %f21, 0f3F800000;\n"
522  " add.f32 %f23, %f14, 0fBF800000;\n"
523  " mul.f32 %f24, %f23, 0f3F000000;\n"
524  " max.f32 %f25, %f24, 0f00000000;\n"
525  " min.f32 %f26, %f25, 0f3F800000;\n"
526  " add.f32 %f27, %f22, 0f3F000000;\n"
527  " div.rn.f32 %f28, %f22, %f27;\n"
528  " add.f32 %f29, %f26, 0f3F000000;\n"
529  " div.rn.f32 %f30, %f26, %f29;\n"
530  " neg.f32 %f31, %f28;\n"
531  " fma.rn.f32 %f5, %f11, %f16, %f31;\n"
532  " neg.f32 %f32, %f30;\n"
533  " fma.rn.f32 %f4, %f14, %f18, %f32;\n"
534  " // begin inline asm\n"
535  " tex.2d.v4.u32.f32 {%r14, %r15, %r16, %r17}, [%rd4, {%f5, %f4}];\n"
536  " // end inline asm\n"
537  " and.b32 %r30, %r14, 255;\n"
538  " fma.rn.f32 %f7, %f11, %f16, %f28;\n"
539  " // begin inline asm\n"
540  " tex.2d.v4.u32.f32 {%r18, %r19, %r20, %r21}, [%rd4, {%f7, %f4}];\n"
541  " // end inline asm\n"
542  " and.b32 %r31, %r18, 255;\n"
543  " add.s32 %r32, %r30, %r31;\n"
544  " fma.rn.f32 %f8, %f14, %f18, %f30;\n"
545  " // begin inline asm\n"
546  " tex.2d.v4.u32.f32 {%r22, %r23, %r24, %r25}, [%rd4, {%f5, %f8}];\n"
547  " // end inline asm\n"
548  " and.b32 %r33, %r22, 255;\n"
549  " add.s32 %r34, %r32, %r33;\n"
550  " // begin inline asm\n"
551  " tex.2d.v4.u32.f32 {%r26, %r27, %r28, %r29}, [%rd4, {%f7, %f8}];\n"
552  " // end inline asm\n"
553  " and.b32 %r35, %r26, 255;\n"
554  " add.s32 %r36, %r34, %r35;\n"
555  " add.s32 %r37, %r36, 2;\n"
556  " mad.lo.s32 %r38, %r2, %r5, %r1;\n"
557  " cvt.s64.s32 %rd8, %r38;\n"
558  " add.s64 %rd9, %rd1, %rd8;\n"
559  " shr.u32 %r39, %r37, 2;\n"
560  " st.global.u8 [%rd9], %r39;\n"
561  "LBB6_2:\n"
562  " ret;\n"
563  "\n"
564  "}\n"
565  " // .globl Subsample_Bilinear_uchar2\n"
566  ".visible .entry Subsample_Bilinear_uchar2(\n"
567  " .param .u64 Subsample_Bilinear_uchar2_param_0,\n"
568  " .param .u64 Subsample_Bilinear_uchar2_param_1,\n"
569  " .param .u32 Subsample_Bilinear_uchar2_param_2,\n"
570  " .param .u32 Subsample_Bilinear_uchar2_param_3,\n"
571  " .param .u32 Subsample_Bilinear_uchar2_param_4,\n"
572  " .param .u32 Subsample_Bilinear_uchar2_param_5,\n"
573  " .param .u32 Subsample_Bilinear_uchar2_param_6,\n"
574  " .param .u32 Subsample_Bilinear_uchar2_param_7\n"
575  ")\n"
576  "{\n"
577  " .reg .pred %p<4>;\n"
578  " .reg .b16 %rs<3>;\n"
579  " .reg .f32 %f<33>;\n"
580  " .reg .b32 %r<49>;\n"
581  " .reg .b64 %rd<10>;\n"
582  "\n"
583  " ld.param.u32 %r4, [Subsample_Bilinear_uchar2_param_3];\n"
584  " ld.param.u32 %r3, [Subsample_Bilinear_uchar2_param_2];\n"
585  " // begin inline asm\n"
586  " mov.u32 %r8, %ctaid.x;\n"
587  " // end inline asm\n"
588  " // begin inline asm\n"
589  " mov.u32 %r9, %ctaid.y;\n"
590  " // end inline asm\n"
591  " // begin inline asm\n"
592  " mov.u32 %r10, %ntid.x;\n"
593  " // end inline asm\n"
594  " // begin inline asm\n"
595  " mov.u32 %r11, %ntid.y;\n"
596  " // end inline asm\n"
597  " // begin inline asm\n"
598  " mov.u32 %r12, %tid.x;\n"
599  " // end inline asm\n"
600  " // begin inline asm\n"
601  " mov.u32 %r13, %tid.y;\n"
602  " // end inline asm\n"
603  " mad.lo.s32 %r1, %r10, %r8, %r12;\n"
604  " mad.lo.s32 %r2, %r11, %r9, %r13;\n"
605  " setp.ge.s32 %p1, %r2, %r4;\n"
606  " setp.ge.s32 %p2, %r1, %r3;\n"
607  " or.pred %p3, %p2, %p1;\n"
608  " @%p3 bra LBB7_2;\n"
609  " ld.param.u32 %r7, [Subsample_Bilinear_uchar2_param_6];\n"
610  " ld.param.u32 %r6, [Subsample_Bilinear_uchar2_param_5];\n"
611  " ld.param.u32 %r5, [Subsample_Bilinear_uchar2_param_4];\n"
612  " ld.param.u64 %rd4, [Subsample_Bilinear_uchar2_param_0];\n"
613  " ld.param.u64 %rd3, [Subsample_Bilinear_uchar2_param_1];\n"
614  " cvta.to.global.u64 %rd1, %rd3;\n"
615  " cvt.rn.f32.s32 %f9, %r6;\n"
616  " cvt.rn.f32.s32 %f10, %r3;\n"
617  " div.rn.f32 %f11, %f9, %f10;\n"
618  " cvt.rn.f32.s32 %f12, %r7;\n"
619  " cvt.rn.f32.s32 %f13, %r4;\n"
620  " div.rn.f32 %f14, %f12, %f13;\n"
621  " cvt.rn.f32.s32 %f15, %r1;\n"
622  " add.f32 %f16, %f15, 0f3F000000;\n"
623  " cvt.rn.f32.s32 %f17, %r2;\n"
624  " add.f32 %f18, %f17, 0f3F000000;\n"
625  " add.f32 %f19, %f11, 0fBF800000;\n"
626  " mul.f32 %f20, %f19, 0f3F000000;\n"
627  " max.f32 %f21, %f20, 0f00000000;\n"
628  " min.f32 %f22, %f21, 0f3F800000;\n"
629  " add.f32 %f23, %f14, 0fBF800000;\n"
630  " mul.f32 %f24, %f23, 0f3F000000;\n"
631  " max.f32 %f25, %f24, 0f00000000;\n"
632  " min.f32 %f26, %f25, 0f3F800000;\n"
633  " add.f32 %f27, %f22, 0f3F000000;\n"
634  " div.rn.f32 %f28, %f22, %f27;\n"
635  " add.f32 %f29, %f26, 0f3F000000;\n"
636  " div.rn.f32 %f30, %f26, %f29;\n"
637  " neg.f32 %f31, %f28;\n"
638  " fma.rn.f32 %f5, %f11, %f16, %f31;\n"
639  " neg.f32 %f32, %f30;\n"
640  " fma.rn.f32 %f4, %f14, %f18, %f32;\n"
641  " // begin inline asm\n"
642  " tex.2d.v4.u32.f32 {%r14, %r15, %r16, %r17}, [%rd4, {%f5, %f4}];\n"
643  " // end inline asm\n"
644  " and.b32 %r30, %r14, 255;\n"
645  " fma.rn.f32 %f7, %f11, %f16, %f28;\n"
646  " // begin inline asm\n"
647  " tex.2d.v4.u32.f32 {%r18, %r19, %r20, %r21}, [%rd4, {%f7, %f4}];\n"
648  " // end inline asm\n"
649  " and.b32 %r31, %r18, 255;\n"
650  " add.s32 %r32, %r30, %r31;\n"
651  " and.b32 %r33, %r15, 255;\n"
652  " and.b32 %r34, %r19, 255;\n"
653  " add.s32 %r35, %r33, %r34;\n"
654  " fma.rn.f32 %f8, %f14, %f18, %f30;\n"
655  " // begin inline asm\n"
656  " tex.2d.v4.u32.f32 {%r22, %r23, %r24, %r25}, [%rd4, {%f5, %f8}];\n"
657  " // end inline asm\n"
658  " and.b32 %r36, %r22, 255;\n"
659  " add.s32 %r37, %r32, %r36;\n"
660  " and.b32 %r38, %r23, 255;\n"
661  " add.s32 %r39, %r35, %r38;\n"
662  " // begin inline asm\n"
663  " tex.2d.v4.u32.f32 {%r26, %r27, %r28, %r29}, [%rd4, {%f7, %f8}];\n"
664  " // end inline asm\n"
665  " and.b32 %r40, %r26, 255;\n"
666  " add.s32 %r41, %r37, %r40;\n"
667  " and.b32 %r42, %r27, 255;\n"
668  " add.s32 %r43, %r39, %r42;\n"
669  " add.s32 %r44, %r41, 2;\n"
670  " add.s32 %r45, %r43, 2;\n"
671  " mad.lo.s32 %r46, %r2, %r5, %r1;\n"
672  " shr.u32 %r47, %r44, 2;\n"
673  " shr.u32 %r48, %r45, 2;\n"
674  " cvt.u16.u32 %rs1, %r47;\n"
675  " mul.wide.s32 %rd8, %r46, 2;\n"
676  " add.s64 %rd9, %rd1, %rd8;\n"
677  " cvt.u16.u32 %rs2, %r48;\n"
678  " st.global.v2.u8 [%rd9], {%rs1, %rs2};\n"
679  "LBB7_2:\n"
680  " ret;\n"
681  "\n"
682  "}\n"
683  " // .globl Subsample_Bilinear_uchar4\n"
684  ".visible .entry Subsample_Bilinear_uchar4(\n"
685  " .param .u64 Subsample_Bilinear_uchar4_param_0,\n"
686  " .param .u64 Subsample_Bilinear_uchar4_param_1,\n"
687  " .param .u32 Subsample_Bilinear_uchar4_param_2,\n"
688  " .param .u32 Subsample_Bilinear_uchar4_param_3,\n"
689  " .param .u32 Subsample_Bilinear_uchar4_param_4,\n"
690  " .param .u32 Subsample_Bilinear_uchar4_param_5,\n"
691  " .param .u32 Subsample_Bilinear_uchar4_param_6,\n"
692  " .param .u32 Subsample_Bilinear_uchar4_param_7\n"
693  ")\n"
694  "{\n"
695  " .reg .pred %p<4>;\n"
696  " .reg .b16 %rs<5>;\n"
697  " .reg .f32 %f<33>;\n"
698  " .reg .b32 %r<67>;\n"
699  " .reg .b64 %rd<10>;\n"
700  "\n"
701  " ld.param.u32 %r4, [Subsample_Bilinear_uchar4_param_3];\n"
702  " ld.param.u32 %r3, [Subsample_Bilinear_uchar4_param_2];\n"
703  " // begin inline asm\n"
704  " mov.u32 %r8, %ctaid.x;\n"
705  " // end inline asm\n"
706  " // begin inline asm\n"
707  " mov.u32 %r9, %ctaid.y;\n"
708  " // end inline asm\n"
709  " // begin inline asm\n"
710  " mov.u32 %r10, %ntid.x;\n"
711  " // end inline asm\n"
712  " // begin inline asm\n"
713  " mov.u32 %r11, %ntid.y;\n"
714  " // end inline asm\n"
715  " // begin inline asm\n"
716  " mov.u32 %r12, %tid.x;\n"
717  " // end inline asm\n"
718  " // begin inline asm\n"
719  " mov.u32 %r13, %tid.y;\n"
720  " // end inline asm\n"
721  " mad.lo.s32 %r1, %r10, %r8, %r12;\n"
722  " mad.lo.s32 %r2, %r11, %r9, %r13;\n"
723  " setp.ge.s32 %p1, %r2, %r4;\n"
724  " setp.ge.s32 %p2, %r1, %r3;\n"
725  " or.pred %p3, %p2, %p1;\n"
726  " @%p3 bra LBB8_2;\n"
727  " ld.param.u32 %r7, [Subsample_Bilinear_uchar4_param_6];\n"
728  " ld.param.u32 %r6, [Subsample_Bilinear_uchar4_param_5];\n"
729  " ld.param.u32 %r5, [Subsample_Bilinear_uchar4_param_4];\n"
730  " ld.param.u64 %rd4, [Subsample_Bilinear_uchar4_param_0];\n"
731  " ld.param.u64 %rd3, [Subsample_Bilinear_uchar4_param_1];\n"
732  " cvta.to.global.u64 %rd1, %rd3;\n"
733  " cvt.rn.f32.s32 %f9, %r6;\n"
734  " cvt.rn.f32.s32 %f10, %r3;\n"
735  " div.rn.f32 %f11, %f9, %f10;\n"
736  " cvt.rn.f32.s32 %f12, %r7;\n"
737  " cvt.rn.f32.s32 %f13, %r4;\n"
738  " div.rn.f32 %f14, %f12, %f13;\n"
739  " cvt.rn.f32.s32 %f15, %r1;\n"
740  " add.f32 %f16, %f15, 0f3F000000;\n"
741  " cvt.rn.f32.s32 %f17, %r2;\n"
742  " add.f32 %f18, %f17, 0f3F000000;\n"
743  " add.f32 %f19, %f11, 0fBF800000;\n"
744  " mul.f32 %f20, %f19, 0f3F000000;\n"
745  " max.f32 %f21, %f20, 0f00000000;\n"
746  " min.f32 %f22, %f21, 0f3F800000;\n"
747  " add.f32 %f23, %f14, 0fBF800000;\n"
748  " mul.f32 %f24, %f23, 0f3F000000;\n"
749  " max.f32 %f25, %f24, 0f00000000;\n"
750  " min.f32 %f26, %f25, 0f3F800000;\n"
751  " add.f32 %f27, %f22, 0f3F000000;\n"
752  " div.rn.f32 %f28, %f22, %f27;\n"
753  " add.f32 %f29, %f26, 0f3F000000;\n"
754  " div.rn.f32 %f30, %f26, %f29;\n"
755  " neg.f32 %f31, %f28;\n"
756  " fma.rn.f32 %f5, %f11, %f16, %f31;\n"
757  " neg.f32 %f32, %f30;\n"
758  " fma.rn.f32 %f4, %f14, %f18, %f32;\n"
759  " // begin inline asm\n"
760  " tex.2d.v4.u32.f32 {%r14, %r15, %r16, %r17}, [%rd4, {%f5, %f4}];\n"
761  " // end inline asm\n"
762  " and.b32 %r30, %r14, 255;\n"
763  " fma.rn.f32 %f7, %f11, %f16, %f28;\n"
764  " // begin inline asm\n"
765  " tex.2d.v4.u32.f32 {%r18, %r19, %r20, %r21}, [%rd4, {%f7, %f4}];\n"
766  " // end inline asm\n"
767  " and.b32 %r31, %r18, 255;\n"
768  " add.s32 %r32, %r30, %r31;\n"
769  " and.b32 %r33, %r15, 255;\n"
770  " and.b32 %r34, %r19, 255;\n"
771  " add.s32 %r35, %r33, %r34;\n"
772  " and.b32 %r36, %r16, 255;\n"
773  " and.b32 %r37, %r20, 255;\n"
774  " add.s32 %r38, %r36, %r37;\n"
775  " and.b32 %r39, %r17, 255;\n"
776  " and.b32 %r40, %r21, 255;\n"
777  " add.s32 %r41, %r39, %r40;\n"
778  " fma.rn.f32 %f8, %f14, %f18, %f30;\n"
779  " // begin inline asm\n"
780  " tex.2d.v4.u32.f32 {%r22, %r23, %r24, %r25}, [%rd4, {%f5, %f8}];\n"
781  " // end inline asm\n"
782  " and.b32 %r42, %r22, 255;\n"
783  " add.s32 %r43, %r32, %r42;\n"
784  " and.b32 %r44, %r23, 255;\n"
785  " add.s32 %r45, %r35, %r44;\n"
786  " and.b32 %r46, %r24, 255;\n"
787  " add.s32 %r47, %r38, %r46;\n"
788  " and.b32 %r48, %r25, 255;\n"
789  " add.s32 %r49, %r41, %r48;\n"
790  " // begin inline asm\n"
791  " tex.2d.v4.u32.f32 {%r26, %r27, %r28, %r29}, [%rd4, {%f7, %f8}];\n"
792  " // end inline asm\n"
793  " and.b32 %r50, %r26, 255;\n"
794  " add.s32 %r51, %r43, %r50;\n"
795  " and.b32 %r52, %r27, 255;\n"
796  " add.s32 %r53, %r45, %r52;\n"
797  " and.b32 %r54, %r28, 255;\n"
798  " add.s32 %r55, %r47, %r54;\n"
799  " and.b32 %r56, %r29, 255;\n"
800  " add.s32 %r57, %r49, %r56;\n"
801  " add.s32 %r58, %r51, 2;\n"
802  " add.s32 %r59, %r53, 2;\n"
803  " add.s32 %r60, %r55, 2;\n"
804  " add.s32 %r61, %r57, 2;\n"
805  " mad.lo.s32 %r62, %r2, %r5, %r1;\n"
806  " shr.u32 %r63, %r58, 2;\n"
807  " shr.u32 %r64, %r59, 2;\n"
808  " shr.u32 %r65, %r60, 2;\n"
809  " shr.u32 %r66, %r61, 2;\n"
810  " cvt.u16.u32 %rs1, %r63;\n"
811  " mul.wide.s32 %rd8, %r62, 4;\n"
812  " add.s64 %rd9, %rd1, %rd8;\n"
813  " cvt.u16.u32 %rs2, %r64;\n"
814  " cvt.u16.u32 %rs3, %r65;\n"
815  " cvt.u16.u32 %rs4, %r66;\n"
816  " st.global.v4.u8 [%rd9], {%rs1, %rs2, %rs3, %rs4};\n"
817  "LBB8_2:\n"
818  " ret;\n"
819  "\n"
820  "}\n"
821  " // .globl Subsample_Bilinear_ushort\n"
822  ".visible .entry Subsample_Bilinear_ushort(\n"
823  " .param .u64 Subsample_Bilinear_ushort_param_0,\n"
824  " .param .u64 Subsample_Bilinear_ushort_param_1,\n"
825  " .param .u32 Subsample_Bilinear_ushort_param_2,\n"
826  " .param .u32 Subsample_Bilinear_ushort_param_3,\n"
827  " .param .u32 Subsample_Bilinear_ushort_param_4,\n"
828  " .param .u32 Subsample_Bilinear_ushort_param_5,\n"
829  " .param .u32 Subsample_Bilinear_ushort_param_6,\n"
830  " .param .u32 Subsample_Bilinear_ushort_param_7\n"
831  ")\n"
832  "{\n"
833  " .reg .pred %p<4>;\n"
834  " .reg .f32 %f<33>;\n"
835  " .reg .b32 %r<40>;\n"
836  " .reg .b64 %rd<10>;\n"
837  "\n"
838  " ld.param.u32 %r4, [Subsample_Bilinear_ushort_param_3];\n"
839  " ld.param.u32 %r3, [Subsample_Bilinear_ushort_param_2];\n"
840  " // begin inline asm\n"
841  " mov.u32 %r8, %ctaid.x;\n"
842  " // end inline asm\n"
843  " // begin inline asm\n"
844  " mov.u32 %r9, %ctaid.y;\n"
845  " // end inline asm\n"
846  " // begin inline asm\n"
847  " mov.u32 %r10, %ntid.x;\n"
848  " // end inline asm\n"
849  " // begin inline asm\n"
850  " mov.u32 %r11, %ntid.y;\n"
851  " // end inline asm\n"
852  " // begin inline asm\n"
853  " mov.u32 %r12, %tid.x;\n"
854  " // end inline asm\n"
855  " // begin inline asm\n"
856  " mov.u32 %r13, %tid.y;\n"
857  " // end inline asm\n"
858  " mad.lo.s32 %r1, %r10, %r8, %r12;\n"
859  " mad.lo.s32 %r2, %r11, %r9, %r13;\n"
860  " setp.ge.s32 %p1, %r2, %r4;\n"
861  " setp.ge.s32 %p2, %r1, %r3;\n"
862  " or.pred %p3, %p2, %p1;\n"
863  " @%p3 bra LBB9_2;\n"
864  " ld.param.u32 %r7, [Subsample_Bilinear_ushort_param_6];\n"
865  " ld.param.u32 %r6, [Subsample_Bilinear_ushort_param_5];\n"
866  " ld.param.u32 %r5, [Subsample_Bilinear_ushort_param_4];\n"
867  " ld.param.u64 %rd4, [Subsample_Bilinear_ushort_param_0];\n"
868  " ld.param.u64 %rd3, [Subsample_Bilinear_ushort_param_1];\n"
869  " cvta.to.global.u64 %rd1, %rd3;\n"
870  " cvt.rn.f32.s32 %f9, %r6;\n"
871  " cvt.rn.f32.s32 %f10, %r3;\n"
872  " div.rn.f32 %f11, %f9, %f10;\n"
873  " cvt.rn.f32.s32 %f12, %r7;\n"
874  " cvt.rn.f32.s32 %f13, %r4;\n"
875  " div.rn.f32 %f14, %f12, %f13;\n"
876  " cvt.rn.f32.s32 %f15, %r1;\n"
877  " add.f32 %f16, %f15, 0f3F000000;\n"
878  " cvt.rn.f32.s32 %f17, %r2;\n"
879  " add.f32 %f18, %f17, 0f3F000000;\n"
880  " add.f32 %f19, %f11, 0fBF800000;\n"
881  " mul.f32 %f20, %f19, 0f3F000000;\n"
882  " max.f32 %f21, %f20, 0f00000000;\n"
883  " min.f32 %f22, %f21, 0f3F800000;\n"
884  " add.f32 %f23, %f14, 0fBF800000;\n"
885  " mul.f32 %f24, %f23, 0f3F000000;\n"
886  " max.f32 %f25, %f24, 0f00000000;\n"
887  " min.f32 %f26, %f25, 0f3F800000;\n"
888  " add.f32 %f27, %f22, 0f3F000000;\n"
889  " div.rn.f32 %f28, %f22, %f27;\n"
890  " add.f32 %f29, %f26, 0f3F000000;\n"
891  " div.rn.f32 %f30, %f26, %f29;\n"
892  " neg.f32 %f31, %f28;\n"
893  " fma.rn.f32 %f5, %f11, %f16, %f31;\n"
894  " neg.f32 %f32, %f30;\n"
895  " fma.rn.f32 %f4, %f14, %f18, %f32;\n"
896  " // begin inline asm\n"
897  " tex.2d.v4.u32.f32 {%r14, %r15, %r16, %r17}, [%rd4, {%f5, %f4}];\n"
898  " // end inline asm\n"
899  " and.b32 %r30, %r14, 65535;\n"
900  " fma.rn.f32 %f7, %f11, %f16, %f28;\n"
901  " // begin inline asm\n"
902  " tex.2d.v4.u32.f32 {%r18, %r19, %r20, %r21}, [%rd4, {%f7, %f4}];\n"
903  " // end inline asm\n"
904  " and.b32 %r31, %r18, 65535;\n"
905  " add.s32 %r32, %r30, %r31;\n"
906  " fma.rn.f32 %f8, %f14, %f18, %f30;\n"
907  " // begin inline asm\n"
908  " tex.2d.v4.u32.f32 {%r22, %r23, %r24, %r25}, [%rd4, {%f5, %f8}];\n"
909  " // end inline asm\n"
910  " and.b32 %r33, %r22, 65535;\n"
911  " add.s32 %r34, %r32, %r33;\n"
912  " // begin inline asm\n"
913  " tex.2d.v4.u32.f32 {%r26, %r27, %r28, %r29}, [%rd4, {%f7, %f8}];\n"
914  " // end inline asm\n"
915  " and.b32 %r35, %r26, 65535;\n"
916  " add.s32 %r36, %r34, %r35;\n"
917  " add.s32 %r37, %r36, 2;\n"
918  " mad.lo.s32 %r38, %r2, %r5, %r1;\n"
919  " mul.wide.s32 %rd8, %r38, 2;\n"
920  " add.s64 %rd9, %rd1, %rd8;\n"
921  " shr.u32 %r39, %r37, 2;\n"
922  " st.global.u16 [%rd9], %r39;\n"
923  "LBB9_2:\n"
924  " ret;\n"
925  "\n"
926  "}\n"
927  " // .globl Subsample_Bilinear_ushort2\n"
928  ".visible .entry Subsample_Bilinear_ushort2(\n"
929  " .param .u64 Subsample_Bilinear_ushort2_param_0,\n"
930  " .param .u64 Subsample_Bilinear_ushort2_param_1,\n"
931  " .param .u32 Subsample_Bilinear_ushort2_param_2,\n"
932  " .param .u32 Subsample_Bilinear_ushort2_param_3,\n"
933  " .param .u32 Subsample_Bilinear_ushort2_param_4,\n"
934  " .param .u32 Subsample_Bilinear_ushort2_param_5,\n"
935  " .param .u32 Subsample_Bilinear_ushort2_param_6,\n"
936  " .param .u32 Subsample_Bilinear_ushort2_param_7\n"
937  ")\n"
938  "{\n"
939  " .reg .pred %p<4>;\n"
940  " .reg .b16 %rs<3>;\n"
941  " .reg .f32 %f<33>;\n"
942  " .reg .b32 %r<49>;\n"
943  " .reg .b64 %rd<10>;\n"
944  "\n"
945  " ld.param.u32 %r4, [Subsample_Bilinear_ushort2_param_3];\n"
946  " ld.param.u32 %r3, [Subsample_Bilinear_ushort2_param_2];\n"
947  " // begin inline asm\n"
948  " mov.u32 %r8, %ctaid.x;\n"
949  " // end inline asm\n"
950  " // begin inline asm\n"
951  " mov.u32 %r9, %ctaid.y;\n"
952  " // end inline asm\n"
953  " // begin inline asm\n"
954  " mov.u32 %r10, %ntid.x;\n"
955  " // end inline asm\n"
956  " // begin inline asm\n"
957  " mov.u32 %r11, %ntid.y;\n"
958  " // end inline asm\n"
959  " // begin inline asm\n"
960  " mov.u32 %r12, %tid.x;\n"
961  " // end inline asm\n"
962  " // begin inline asm\n"
963  " mov.u32 %r13, %tid.y;\n"
964  " // end inline asm\n"
965  " mad.lo.s32 %r1, %r10, %r8, %r12;\n"
966  " mad.lo.s32 %r2, %r11, %r9, %r13;\n"
967  " setp.ge.s32 %p1, %r2, %r4;\n"
968  " setp.ge.s32 %p2, %r1, %r3;\n"
969  " or.pred %p3, %p2, %p1;\n"
970  " @%p3 bra LBB10_2;\n"
971  " ld.param.u32 %r7, [Subsample_Bilinear_ushort2_param_6];\n"
972  " ld.param.u32 %r6, [Subsample_Bilinear_ushort2_param_5];\n"
973  " ld.param.u32 %r5, [Subsample_Bilinear_ushort2_param_4];\n"
974  " ld.param.u64 %rd4, [Subsample_Bilinear_ushort2_param_0];\n"
975  " ld.param.u64 %rd3, [Subsample_Bilinear_ushort2_param_1];\n"
976  " cvta.to.global.u64 %rd1, %rd3;\n"
977  " cvt.rn.f32.s32 %f9, %r6;\n"
978  " cvt.rn.f32.s32 %f10, %r3;\n"
979  " div.rn.f32 %f11, %f9, %f10;\n"
980  " cvt.rn.f32.s32 %f12, %r7;\n"
981  " cvt.rn.f32.s32 %f13, %r4;\n"
982  " div.rn.f32 %f14, %f12, %f13;\n"
983  " cvt.rn.f32.s32 %f15, %r1;\n"
984  " add.f32 %f16, %f15, 0f3F000000;\n"
985  " cvt.rn.f32.s32 %f17, %r2;\n"
986  " add.f32 %f18, %f17, 0f3F000000;\n"
987  " add.f32 %f19, %f11, 0fBF800000;\n"
988  " mul.f32 %f20, %f19, 0f3F000000;\n"
989  " max.f32 %f21, %f20, 0f00000000;\n"
990  " min.f32 %f22, %f21, 0f3F800000;\n"
991  " add.f32 %f23, %f14, 0fBF800000;\n"
992  " mul.f32 %f24, %f23, 0f3F000000;\n"
993  " max.f32 %f25, %f24, 0f00000000;\n"
994  " min.f32 %f26, %f25, 0f3F800000;\n"
995  " add.f32 %f27, %f22, 0f3F000000;\n"
996  " div.rn.f32 %f28, %f22, %f27;\n"
997  " add.f32 %f29, %f26, 0f3F000000;\n"
998  " div.rn.f32 %f30, %f26, %f29;\n"
999  " neg.f32 %f31, %f28;\n"
1000  " fma.rn.f32 %f5, %f11, %f16, %f31;\n"
1001  " neg.f32 %f32, %f30;\n"
1002  " fma.rn.f32 %f4, %f14, %f18, %f32;\n"
1003  " // begin inline asm\n"
1004  " tex.2d.v4.u32.f32 {%r14, %r15, %r16, %r17}, [%rd4, {%f5, %f4}];\n"
1005  " // end inline asm\n"
1006  " and.b32 %r30, %r14, 65535;\n"
1007  " fma.rn.f32 %f7, %f11, %f16, %f28;\n"
1008  " // begin inline asm\n"
1009  " tex.2d.v4.u32.f32 {%r18, %r19, %r20, %r21}, [%rd4, {%f7, %f4}];\n"
1010  " // end inline asm\n"
1011  " and.b32 %r31, %r18, 65535;\n"
1012  " add.s32 %r32, %r30, %r31;\n"
1013  " and.b32 %r33, %r15, 65535;\n"
1014  " and.b32 %r34, %r19, 65535;\n"
1015  " add.s32 %r35, %r33, %r34;\n"
1016  " fma.rn.f32 %f8, %f14, %f18, %f30;\n"
1017  " // begin inline asm\n"
1018  " tex.2d.v4.u32.f32 {%r22, %r23, %r24, %r25}, [%rd4, {%f5, %f8}];\n"
1019  " // end inline asm\n"
1020  " and.b32 %r36, %r22, 65535;\n"
1021  " add.s32 %r37, %r32, %r36;\n"
1022  " and.b32 %r38, %r23, 65535;\n"
1023  " add.s32 %r39, %r35, %r38;\n"
1024  " // begin inline asm\n"
1025  " tex.2d.v4.u32.f32 {%r26, %r27, %r28, %r29}, [%rd4, {%f7, %f8}];\n"
1026  " // end inline asm\n"
1027  " and.b32 %r40, %r26, 65535;\n"
1028  " add.s32 %r41, %r37, %r40;\n"
1029  " and.b32 %r42, %r27, 65535;\n"
1030  " add.s32 %r43, %r39, %r42;\n"
1031  " add.s32 %r44, %r41, 2;\n"
1032  " add.s32 %r45, %r43, 2;\n"
1033  " mad.lo.s32 %r46, %r2, %r5, %r1;\n"
1034  " shr.u32 %r47, %r44, 2;\n"
1035  " shr.u32 %r48, %r45, 2;\n"
1036  " cvt.u16.u32 %rs1, %r47;\n"
1037  " mul.wide.s32 %rd8, %r46, 4;\n"
1038  " add.s64 %rd9, %rd1, %rd8;\n"
1039  " cvt.u16.u32 %rs2, %r48;\n"
1040  " st.global.v2.u16 [%rd9], {%rs1, %rs2};\n"
1041  "LBB10_2:\n"
1042  " ret;\n"
1043  "\n"
1044  "}\n"
1045  " // .globl Subsample_Bilinear_ushort4\n"
1046  ".visible .entry Subsample_Bilinear_ushort4(\n"
1047  " .param .u64 Subsample_Bilinear_ushort4_param_0,\n"
1048  " .param .u64 Subsample_Bilinear_ushort4_param_1,\n"
1049  " .param .u32 Subsample_Bilinear_ushort4_param_2,\n"
1050  " .param .u32 Subsample_Bilinear_ushort4_param_3,\n"
1051  " .param .u32 Subsample_Bilinear_ushort4_param_4,\n"
1052  " .param .u32 Subsample_Bilinear_ushort4_param_5,\n"
1053  " .param .u32 Subsample_Bilinear_ushort4_param_6,\n"
1054  " .param .u32 Subsample_Bilinear_ushort4_param_7\n"
1055  ")\n"
1056  "{\n"
1057  " .reg .pred %p<4>;\n"
1058  " .reg .b16 %rs<5>;\n"
1059  " .reg .f32 %f<33>;\n"
1060  " .reg .b32 %r<67>;\n"
1061  " .reg .b64 %rd<10>;\n"
1062  "\n"
1063  " ld.param.u32 %r4, [Subsample_Bilinear_ushort4_param_3];\n"
1064  " ld.param.u32 %r3, [Subsample_Bilinear_ushort4_param_2];\n"
1065  " // begin inline asm\n"
1066  " mov.u32 %r8, %ctaid.x;\n"
1067  " // end inline asm\n"
1068  " // begin inline asm\n"
1069  " mov.u32 %r9, %ctaid.y;\n"
1070  " // end inline asm\n"
1071  " // begin inline asm\n"
1072  " mov.u32 %r10, %ntid.x;\n"
1073  " // end inline asm\n"
1074  " // begin inline asm\n"
1075  " mov.u32 %r11, %ntid.y;\n"
1076  " // end inline asm\n"
1077  " // begin inline asm\n"
1078  " mov.u32 %r12, %tid.x;\n"
1079  " // end inline asm\n"
1080  " // begin inline asm\n"
1081  " mov.u32 %r13, %tid.y;\n"
1082  " // end inline asm\n"
1083  " mad.lo.s32 %r1, %r10, %r8, %r12;\n"
1084  " mad.lo.s32 %r2, %r11, %r9, %r13;\n"
1085  " setp.ge.s32 %p1, %r2, %r4;\n"
1086  " setp.ge.s32 %p2, %r1, %r3;\n"
1087  " or.pred %p3, %p2, %p1;\n"
1088  " @%p3 bra LBB11_2;\n"
1089  " ld.param.u32 %r7, [Subsample_Bilinear_ushort4_param_6];\n"
1090  " ld.param.u32 %r6, [Subsample_Bilinear_ushort4_param_5];\n"
1091  " ld.param.u32 %r5, [Subsample_Bilinear_ushort4_param_4];\n"
1092  " ld.param.u64 %rd4, [Subsample_Bilinear_ushort4_param_0];\n"
1093  " ld.param.u64 %rd3, [Subsample_Bilinear_ushort4_param_1];\n"
1094  " cvta.to.global.u64 %rd1, %rd3;\n"
1095  " cvt.rn.f32.s32 %f9, %r6;\n"
1096  " cvt.rn.f32.s32 %f10, %r3;\n"
1097  " div.rn.f32 %f11, %f9, %f10;\n"
1098  " cvt.rn.f32.s32 %f12, %r7;\n"
1099  " cvt.rn.f32.s32 %f13, %r4;\n"
1100  " div.rn.f32 %f14, %f12, %f13;\n"
1101  " cvt.rn.f32.s32 %f15, %r1;\n"
1102  " add.f32 %f16, %f15, 0f3F000000;\n"
1103  " cvt.rn.f32.s32 %f17, %r2;\n"
1104  " add.f32 %f18, %f17, 0f3F000000;\n"
1105  " add.f32 %f19, %f11, 0fBF800000;\n"
1106  " mul.f32 %f20, %f19, 0f3F000000;\n"
1107  " max.f32 %f21, %f20, 0f00000000;\n"
1108  " min.f32 %f22, %f21, 0f3F800000;\n"
1109  " add.f32 %f23, %f14, 0fBF800000;\n"
1110  " mul.f32 %f24, %f23, 0f3F000000;\n"
1111  " max.f32 %f25, %f24, 0f00000000;\n"
1112  " min.f32 %f26, %f25, 0f3F800000;\n"
1113  " add.f32 %f27, %f22, 0f3F000000;\n"
1114  " div.rn.f32 %f28, %f22, %f27;\n"
1115  " add.f32 %f29, %f26, 0f3F000000;\n"
1116  " div.rn.f32 %f30, %f26, %f29;\n"
1117  " neg.f32 %f31, %f28;\n"
1118  " fma.rn.f32 %f5, %f11, %f16, %f31;\n"
1119  " neg.f32 %f32, %f30;\n"
1120  " fma.rn.f32 %f4, %f14, %f18, %f32;\n"
1121  " // begin inline asm\n"
1122  " tex.2d.v4.u32.f32 {%r14, %r15, %r16, %r17}, [%rd4, {%f5, %f4}];\n"
1123  " // end inline asm\n"
1124  " and.b32 %r30, %r14, 65535;\n"
1125  " fma.rn.f32 %f7, %f11, %f16, %f28;\n"
1126  " // begin inline asm\n"
1127  " tex.2d.v4.u32.f32 {%r18, %r19, %r20, %r21}, [%rd4, {%f7, %f4}];\n"
1128  " // end inline asm\n"
1129  " and.b32 %r31, %r18, 65535;\n"
1130  " add.s32 %r32, %r30, %r31;\n"
1131  " and.b32 %r33, %r15, 65535;\n"
1132  " and.b32 %r34, %r19, 65535;\n"
1133  " add.s32 %r35, %r33, %r34;\n"
1134  " and.b32 %r36, %r16, 65535;\n"
1135  " and.b32 %r37, %r20, 65535;\n"
1136  " add.s32 %r38, %r36, %r37;\n"
1137  " and.b32 %r39, %r17, 65535;\n"
1138  " and.b32 %r40, %r21, 65535;\n"
1139  " add.s32 %r41, %r39, %r40;\n"
1140  " fma.rn.f32 %f8, %f14, %f18, %f30;\n"
1141  " // begin inline asm\n"
1142  " tex.2d.v4.u32.f32 {%r22, %r23, %r24, %r25}, [%rd4, {%f5, %f8}];\n"
1143  " // end inline asm\n"
1144  " and.b32 %r42, %r22, 65535;\n"
1145  " add.s32 %r43, %r32, %r42;\n"
1146  " and.b32 %r44, %r23, 65535;\n"
1147  " add.s32 %r45, %r35, %r44;\n"
1148  " and.b32 %r46, %r24, 65535;\n"
1149  " add.s32 %r47, %r38, %r46;\n"
1150  " and.b32 %r48, %r25, 65535;\n"
1151  " add.s32 %r49, %r41, %r48;\n"
1152  " // begin inline asm\n"
1153  " tex.2d.v4.u32.f32 {%r26, %r27, %r28, %r29}, [%rd4, {%f7, %f8}];\n"
1154  " // end inline asm\n"
1155  " and.b32 %r50, %r26, 65535;\n"
1156  " add.s32 %r51, %r43, %r50;\n"
1157  " and.b32 %r52, %r27, 65535;\n"
1158  " add.s32 %r53, %r45, %r52;\n"
1159  " and.b32 %r54, %r28, 65535;\n"
1160  " add.s32 %r55, %r47, %r54;\n"
1161  " and.b32 %r56, %r29, 65535;\n"
1162  " add.s32 %r57, %r49, %r56;\n"
1163  " add.s32 %r58, %r51, 2;\n"
1164  " add.s32 %r59, %r53, 2;\n"
1165  " add.s32 %r60, %r55, 2;\n"
1166  " add.s32 %r61, %r57, 2;\n"
1167  " mad.lo.s32 %r62, %r2, %r5, %r1;\n"
1168  " shr.u32 %r63, %r58, 2;\n"
1169  " shr.u32 %r64, %r59, 2;\n"
1170  " shr.u32 %r65, %r60, 2;\n"
1171  " shr.u32 %r66, %r61, 2;\n"
1172  " cvt.u16.u32 %rs1, %r63;\n"
1173  " mul.wide.s32 %rd8, %r62, 8;\n"
1174  " add.s64 %rd9, %rd1, %rd8;\n"
1175  " cvt.u16.u32 %rs2, %r64;\n"
1176  " cvt.u16.u32 %rs3, %r65;\n"
1177  " cvt.u16.u32 %rs4, %r66;\n"
1178  " st.global.v4.u16 [%rd9], {%rs1, %rs2, %rs3, %rs4};\n"
1179  "LBB11_2:\n"
1180  " ret;\n"
1181  "\n"
1182  "}\n"
1183 ;
const char vf_scale_cuda_ptx[]