3 "// Generated by LLVM NVPTX Back-End\n"
10 " // .globl Thumbnail_uchar\n"
12 ".visible .entry Thumbnail_uchar(\n"
13 " .param .u64 Thumbnail_uchar_param_0,\n"
14 " .param .u64 Thumbnail_uchar_param_1,\n"
15 " .param .u32 Thumbnail_uchar_param_2,\n"
16 " .param .u32 Thumbnail_uchar_param_3\n"
19 " .reg .pred %p<4>;\n"
21 " .reg .b32 %r<17>;\n"
22 " .reg .b64 %rd<7>;\n"
24 " ld.param.u32 %r9, [Thumbnail_uchar_param_2];\n"
25 " // begin inline asm\n"
26 " mov.u32 %r3, %ctaid.x;\n"
27 " // end inline asm\n"
28 " ld.param.u32 %r10, [Thumbnail_uchar_param_3];\n"
29 " // begin inline asm\n"
30 " mov.u32 %r4, %ctaid.y;\n"
31 " // end inline asm\n"
32 " // begin inline asm\n"
33 " mov.u32 %r5, %ntid.x;\n"
34 " // end inline asm\n"
35 " // begin inline asm\n"
36 " mov.u32 %r6, %ntid.y;\n"
37 " // end inline asm\n"
38 " // begin inline asm\n"
39 " mov.u32 %r7, %tid.x;\n"
40 " // end inline asm\n"
41 " // begin inline asm\n"
42 " mov.u32 %r8, %tid.y;\n"
43 " // end inline asm\n"
44 " mad.lo.s32 %r1, %r5, %r3, %r7;\n"
45 " mad.lo.s32 %r2, %r6, %r4, %r8;\n"
46 " setp.ge.s32 %p1, %r2, %r10;\n"
47 " setp.ge.s32 %p2, %r1, %r9;\n"
48 " or.pred %p3, %p2, %p1;\n"
50 " ld.param.u64 %rd4, [Thumbnail_uchar_param_0];\n"
51 " ld.param.u64 %rd3, [Thumbnail_uchar_param_1];\n"
52 " cvta.to.global.u64 %rd1, %rd3;\n"
53 " cvt.rn.f32.s32 %f1, %r1;\n"
54 " cvt.rn.f32.s32 %f2, %r2;\n"
55 " // begin inline asm\n"
56 " tex.2d.v4.u32.f32 {%r11, %r12, %r13, %r14}, [%rd4, {%f1, %f2}];\n"
57 " // end inline asm\n"
58 " and.b32 %r15, %r11, 255;\n"
59 " mul.wide.u32 %rd5, %r15, 4;\n"
60 " add.s64 %rd6, %rd1, %rd5;\n"
61 " atom.global.add.u32 %r16, [%rd6], 1;\n"
66 " // .globl Thumbnail_uchar2\n"
67 ".visible .entry Thumbnail_uchar2(\n"
68 " .param .u64 Thumbnail_uchar2_param_0,\n"
69 " .param .u64 Thumbnail_uchar2_param_1,\n"
70 " .param .u32 Thumbnail_uchar2_param_2,\n"
71 " .param .u32 Thumbnail_uchar2_param_3\n"
74 " .reg .pred %p<4>;\n"
76 " .reg .b32 %r<19>;\n"
77 " .reg .b64 %rd<10>;\n"
79 " ld.param.u32 %r9, [Thumbnail_uchar2_param_2];\n"
80 " // begin inline asm\n"
81 " mov.u32 %r3, %ctaid.x;\n"
82 " // end inline asm\n"
83 " ld.param.u32 %r10, [Thumbnail_uchar2_param_3];\n"
84 " // begin inline asm\n"
85 " mov.u32 %r4, %ctaid.y;\n"
86 " // end inline asm\n"
87 " // begin inline asm\n"
88 " mov.u32 %r5, %ntid.x;\n"
89 " // end inline asm\n"
90 " // begin inline asm\n"
91 " mov.u32 %r6, %ntid.y;\n"
92 " // end inline asm\n"
93 " // begin inline asm\n"
94 " mov.u32 %r7, %tid.x;\n"
95 " // end inline asm\n"
96 " // begin inline asm\n"
97 " mov.u32 %r8, %tid.y;\n"
98 " // end inline asm\n"
99 " mad.lo.s32 %r1, %r5, %r3, %r7;\n"
100 " mad.lo.s32 %r2, %r6, %r4, %r8;\n"
101 " setp.ge.s32 %p1, %r2, %r10;\n"
102 " setp.ge.s32 %p2, %r1, %r9;\n"
103 " or.pred %p3, %p2, %p1;\n"
104 " @%p3 bra LBB1_2;\n"
105 " ld.param.u64 %rd4, [Thumbnail_uchar2_param_0];\n"
106 " ld.param.u64 %rd3, [Thumbnail_uchar2_param_1];\n"
107 " cvta.to.global.u64 %rd1, %rd3;\n"
108 " cvt.rn.f32.s32 %f1, %r1;\n"
109 " cvt.rn.f32.s32 %f2, %r2;\n"
110 " // begin inline asm\n"
111 " tex.2d.v4.u32.f32 {%r11, %r12, %r13, %r14}, [%rd4, {%f1, %f2}];\n"
112 " // end inline asm\n"
113 " and.b32 %r15, %r11, 255;\n"
114 " mul.wide.u32 %rd5, %r15, 4;\n"
115 " add.s64 %rd6, %rd1, %rd5;\n"
116 " atom.global.add.u32 %r16, [%rd6], 1;\n"
117 " and.b32 %r17, %r12, 255;\n"
118 " mul.wide.u32 %rd7, %r17, 4;\n"
119 " add.s64 %rd8, %rd1, %rd7;\n"
120 " add.s64 %rd9, %rd8, 1024;\n"
121 " atom.global.add.u32 %r18, [%rd9], 1;\n"
126 " // .globl Thumbnail_ushort\n"
127 ".visible .entry Thumbnail_ushort(\n"
128 " .param .u64 Thumbnail_ushort_param_0,\n"
129 " .param .u64 Thumbnail_ushort_param_1,\n"
130 " .param .u32 Thumbnail_ushort_param_2,\n"
131 " .param .u32 Thumbnail_ushort_param_3\n"
134 " .reg .pred %p<4>;\n"
135 " .reg .f32 %f<3>;\n"
136 " .reg .b32 %r<19>;\n"
137 " .reg .b64 %rd<7>;\n"
139 " ld.param.u32 %r9, [Thumbnail_ushort_param_2];\n"
140 " // begin inline asm\n"
141 " mov.u32 %r3, %ctaid.x;\n"
142 " // end inline asm\n"
143 " ld.param.u32 %r10, [Thumbnail_ushort_param_3];\n"
144 " // begin inline asm\n"
145 " mov.u32 %r4, %ctaid.y;\n"
146 " // end inline asm\n"
147 " // begin inline asm\n"
148 " mov.u32 %r5, %ntid.x;\n"
149 " // end inline asm\n"
150 " // begin inline asm\n"
151 " mov.u32 %r6, %ntid.y;\n"
152 " // end inline asm\n"
153 " // begin inline asm\n"
154 " mov.u32 %r7, %tid.x;\n"
155 " // end inline asm\n"
156 " // begin inline asm\n"
157 " mov.u32 %r8, %tid.y;\n"
158 " // end inline asm\n"
159 " mad.lo.s32 %r1, %r5, %r3, %r7;\n"
160 " mad.lo.s32 %r2, %r6, %r4, %r8;\n"
161 " setp.ge.s32 %p1, %r2, %r10;\n"
162 " setp.ge.s32 %p2, %r1, %r9;\n"
163 " or.pred %p3, %p2, %p1;\n"
164 " @%p3 bra LBB2_2;\n"
165 " ld.param.u64 %rd4, [Thumbnail_ushort_param_0];\n"
166 " ld.param.u64 %rd3, [Thumbnail_ushort_param_1];\n"
167 " cvta.to.global.u64 %rd1, %rd3;\n"
168 " cvt.rn.f32.s32 %f1, %r1;\n"
169 " cvt.rn.f32.s32 %f2, %r2;\n"
170 " // begin inline asm\n"
171 " tex.2d.v4.u32.f32 {%r11, %r12, %r13, %r14}, [%rd4, {%f1, %f2}];\n"
172 " // end inline asm\n"
173 " and.b32 %r15, %r11, 65535;\n"
174 " add.s32 %r16, %r15, 128;\n"
175 " shr.u32 %r17, %r16, 8;\n"
176 " mul.wide.u32 %rd5, %r17, 4;\n"
177 " add.s64 %rd6, %rd1, %rd5;\n"
178 " atom.global.add.u32 %r18, [%rd6], 1;\n"
183 " // .globl Thumbnail_ushort2\n"
184 ".visible .entry Thumbnail_ushort2(\n"
185 " .param .u64 Thumbnail_ushort2_param_0,\n"
186 " .param .u64 Thumbnail_ushort2_param_1,\n"
187 " .param .u32 Thumbnail_ushort2_param_2,\n"
188 " .param .u32 Thumbnail_ushort2_param_3\n"
191 " .reg .pred %p<4>;\n"
192 " .reg .f32 %f<3>;\n"
193 " .reg .b32 %r<23>;\n"
194 " .reg .b64 %rd<10>;\n"
196 " ld.param.u32 %r9, [Thumbnail_ushort2_param_2];\n"
197 " // begin inline asm\n"
198 " mov.u32 %r3, %ctaid.x;\n"
199 " // end inline asm\n"
200 " ld.param.u32 %r10, [Thumbnail_ushort2_param_3];\n"
201 " // begin inline asm\n"
202 " mov.u32 %r4, %ctaid.y;\n"
203 " // end inline asm\n"
204 " // begin inline asm\n"
205 " mov.u32 %r5, %ntid.x;\n"
206 " // end inline asm\n"
207 " // begin inline asm\n"
208 " mov.u32 %r6, %ntid.y;\n"
209 " // end inline asm\n"
210 " // begin inline asm\n"
211 " mov.u32 %r7, %tid.x;\n"
212 " // end inline asm\n"
213 " // begin inline asm\n"
214 " mov.u32 %r8, %tid.y;\n"
215 " // end inline asm\n"
216 " mad.lo.s32 %r1, %r5, %r3, %r7;\n"
217 " mad.lo.s32 %r2, %r6, %r4, %r8;\n"
218 " setp.ge.s32 %p1, %r2, %r10;\n"
219 " setp.ge.s32 %p2, %r1, %r9;\n"
220 " or.pred %p3, %p2, %p1;\n"
221 " @%p3 bra LBB3_2;\n"
222 " ld.param.u64 %rd4, [Thumbnail_ushort2_param_0];\n"
223 " ld.param.u64 %rd3, [Thumbnail_ushort2_param_1];\n"
224 " cvta.to.global.u64 %rd1, %rd3;\n"
225 " cvt.rn.f32.s32 %f1, %r1;\n"
226 " cvt.rn.f32.s32 %f2, %r2;\n"
227 " // begin inline asm\n"
228 " tex.2d.v4.u32.f32 {%r11, %r12, %r13, %r14}, [%rd4, {%f1, %f2}];\n"
229 " // end inline asm\n"
230 " and.b32 %r15, %r11, 65535;\n"
231 " add.s32 %r16, %r15, 128;\n"
232 " shr.u32 %r17, %r16, 8;\n"
233 " mul.wide.u32 %rd5, %r17, 4;\n"
234 " add.s64 %rd6, %rd1, %rd5;\n"
235 " atom.global.add.u32 %r18, [%rd6], 1;\n"
236 " and.b32 %r19, %r12, 65535;\n"
237 " add.s32 %r20, %r19, 128;\n"
238 " shr.u32 %r21, %r20, 8;\n"
239 " mul.wide.u32 %rd7, %r21, 4;\n"
240 " add.s64 %rd8, %rd1, %rd7;\n"
241 " add.s64 %rd9, %rd8, 1024;\n"
242 " atom.global.add.u32 %r22, [%rd9], 1;\n"
const char vf_thumbnail_cuda_ptx[]