FFmpeg  4.4.4
vp9recon.c
Go to the documentation of this file.
1 /*
2  * VP9 compatible video decoder
3  *
4  * Copyright (C) 2013 Ronald S. Bultje <rsbultje gmail com>
5  * Copyright (C) 2013 Clément Bœsch <u pkh me>
6  *
7  * This file is part of FFmpeg.
8  *
9  * FFmpeg is free software; you can redistribute it and/or
10  * modify it under the terms of the GNU Lesser General Public
11  * License as published by the Free Software Foundation; either
12  * version 2.1 of the License, or (at your option) any later version.
13  *
14  * FFmpeg is distributed in the hope that it will be useful,
15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17  * Lesser General Public License for more details.
18  *
19  * You should have received a copy of the GNU Lesser General Public
20  * License along with FFmpeg; if not, write to the Free Software
21  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
22  */
23 
24 #include "libavutil/avassert.h"
25 #include "libavutil/mem_internal.h"
26 
27 #include "avcodec.h"
28 #include "internal.h"
29 #include "videodsp.h"
30 #include "vp9data.h"
31 #include "vp9dec.h"
32 
34  uint8_t *dst_edge, ptrdiff_t stride_edge,
35  uint8_t *dst_inner, ptrdiff_t stride_inner,
36  uint8_t *l, int col, int x, int w,
37  int row, int y, enum TxfmMode tx,
38  int p, int ss_h, int ss_v, int bytesperpixel)
39 {
40  VP9Context *s = td->s;
41  int have_top = row > 0 || y > 0;
42  int have_left = col > td->tile_col_start || x > 0;
43  int have_right = x < w - 1;
44  int bpp = s->s.h.bpp;
45  static const uint8_t mode_conv[10][2 /* have_left */][2 /* have_top */] = {
46  [VERT_PRED] = { { DC_127_PRED, VERT_PRED },
47  { DC_127_PRED, VERT_PRED } },
48  [HOR_PRED] = { { DC_129_PRED, DC_129_PRED },
49  { HOR_PRED, HOR_PRED } },
50  [DC_PRED] = { { DC_128_PRED, TOP_DC_PRED },
51  { LEFT_DC_PRED, DC_PRED } },
63  { HOR_UP_PRED, HOR_UP_PRED } },
65  { HOR_PRED, TM_VP8_PRED } },
66  };
67  static const struct {
68  uint8_t needs_left:1;
69  uint8_t needs_top:1;
70  uint8_t needs_topleft:1;
71  uint8_t needs_topright:1;
72  uint8_t invert_left:1;
73  } edges[N_INTRA_PRED_MODES] = {
74  [VERT_PRED] = { .needs_top = 1 },
75  [HOR_PRED] = { .needs_left = 1 },
76  [DC_PRED] = { .needs_top = 1, .needs_left = 1 },
77  [DIAG_DOWN_LEFT_PRED] = { .needs_top = 1, .needs_topright = 1 },
78  [DIAG_DOWN_RIGHT_PRED] = { .needs_left = 1, .needs_top = 1,
79  .needs_topleft = 1 },
80  [VERT_RIGHT_PRED] = { .needs_left = 1, .needs_top = 1,
81  .needs_topleft = 1 },
82  [HOR_DOWN_PRED] = { .needs_left = 1, .needs_top = 1,
83  .needs_topleft = 1 },
84  [VERT_LEFT_PRED] = { .needs_top = 1, .needs_topright = 1 },
85  [HOR_UP_PRED] = { .needs_left = 1, .invert_left = 1 },
86  [TM_VP8_PRED] = { .needs_left = 1, .needs_top = 1,
87  .needs_topleft = 1 },
88  [LEFT_DC_PRED] = { .needs_left = 1 },
89  [TOP_DC_PRED] = { .needs_top = 1 },
90  [DC_128_PRED] = { 0 },
91  [DC_127_PRED] = { 0 },
92  [DC_129_PRED] = { 0 }
93  };
94 
95  av_assert2(mode >= 0 && mode < 10);
96  mode = mode_conv[mode][have_left][have_top];
97  if (edges[mode].needs_top) {
98  uint8_t *top, *topleft;
99  int n_px_need = 4 << tx, n_px_have = (((s->cols - col) << !ss_h) - x) * 4;
100  int n_px_need_tr = 0;
101 
102  if (tx == TX_4X4 && edges[mode].needs_topright && have_right)
103  n_px_need_tr = 4;
104 
105  // if top of sb64-row, use s->intra_pred_data[] instead of
106  // dst[-stride] for intra prediction (it contains pre- instead of
107  // post-loopfilter data)
108  if (have_top) {
109  top = !(row & 7) && !y ?
110  s->intra_pred_data[p] + (col * (8 >> ss_h) + x * 4) * bytesperpixel :
111  y == 0 ? &dst_edge[-stride_edge] : &dst_inner[-stride_inner];
112  if (have_left)
113  topleft = !(row & 7) && !y ?
114  s->intra_pred_data[p] + (col * (8 >> ss_h) + x * 4) * bytesperpixel :
115  y == 0 || x == 0 ? &dst_edge[-stride_edge] :
116  &dst_inner[-stride_inner];
117  }
118 
119  if (have_top &&
120  (!edges[mode].needs_topleft || (have_left && top == topleft)) &&
121  (tx != TX_4X4 || !edges[mode].needs_topright || have_right) &&
122  n_px_need + n_px_need_tr <= n_px_have) {
123  *a = top;
124  } else {
125  if (have_top) {
126  if (n_px_need <= n_px_have) {
127  memcpy(*a, top, n_px_need * bytesperpixel);
128  } else {
129 #define memset_bpp(c, i1, v, i2, num) do { \
130  if (bytesperpixel == 1) { \
131  memset(&(c)[(i1)], (v)[(i2)], (num)); \
132  } else { \
133  int n, val = AV_RN16A(&(v)[(i2) * 2]); \
134  for (n = 0; n < (num); n++) { \
135  AV_WN16A(&(c)[((i1) + n) * 2], val); \
136  } \
137  } \
138 } while (0)
139  memcpy(*a, top, n_px_have * bytesperpixel);
140  memset_bpp(*a, n_px_have, (*a), n_px_have - 1, n_px_need - n_px_have);
141  }
142  } else {
143 #define memset_val(c, val, num) do { \
144  if (bytesperpixel == 1) { \
145  memset((c), (val), (num)); \
146  } else { \
147  int n; \
148  for (n = 0; n < (num); n++) { \
149  AV_WN16A(&(c)[n * 2], (val)); \
150  } \
151  } \
152 } while (0)
153  memset_val(*a, (128 << (bpp - 8)) - 1, n_px_need);
154  }
155  if (edges[mode].needs_topleft) {
156  if (have_left && have_top) {
157 #define assign_bpp(c, i1, v, i2) do { \
158  if (bytesperpixel == 1) { \
159  (c)[(i1)] = (v)[(i2)]; \
160  } else { \
161  AV_COPY16(&(c)[(i1) * 2], &(v)[(i2) * 2]); \
162  } \
163 } while (0)
164  assign_bpp(*a, -1, topleft, -1);
165  } else {
166 #define assign_val(c, i, v) do { \
167  if (bytesperpixel == 1) { \
168  (c)[(i)] = (v); \
169  } else { \
170  AV_WN16A(&(c)[(i) * 2], (v)); \
171  } \
172 } while (0)
173  assign_val((*a), -1, (128 << (bpp - 8)) + (have_top ? +1 : -1));
174  }
175  }
176  if (tx == TX_4X4 && edges[mode].needs_topright) {
177  if (have_top && have_right &&
178  n_px_need + n_px_need_tr <= n_px_have) {
179  memcpy(&(*a)[4 * bytesperpixel], &top[4 * bytesperpixel], 4 * bytesperpixel);
180  } else {
181  memset_bpp(*a, 4, *a, 3, 4);
182  }
183  }
184  }
185  }
186  if (edges[mode].needs_left) {
187  if (have_left) {
188  int n_px_need = 4 << tx, i, n_px_have = (((s->rows - row) << !ss_v) - y) * 4;
189  uint8_t *dst = x == 0 ? dst_edge : dst_inner;
190  ptrdiff_t stride = x == 0 ? stride_edge : stride_inner;
191 
192  if (edges[mode].invert_left) {
193  if (n_px_need <= n_px_have) {
194  for (i = 0; i < n_px_need; i++)
195  assign_bpp(l, i, &dst[i * stride], -1);
196  } else {
197  for (i = 0; i < n_px_have; i++)
198  assign_bpp(l, i, &dst[i * stride], -1);
199  memset_bpp(l, n_px_have, l, n_px_have - 1, n_px_need - n_px_have);
200  }
201  } else {
202  if (n_px_need <= n_px_have) {
203  for (i = 0; i < n_px_need; i++)
204  assign_bpp(l, n_px_need - 1 - i, &dst[i * stride], -1);
205  } else {
206  for (i = 0; i < n_px_have; i++)
207  assign_bpp(l, n_px_need - 1 - i, &dst[i * stride], -1);
208  memset_bpp(l, 0, l, n_px_need - n_px_have, n_px_need - n_px_have);
209  }
210  }
211  } else {
212  memset_val(l, (128 << (bpp - 8)) + 1, 4 << tx);
213  }
214  }
215 
216  return mode;
217 }
218 
219 static av_always_inline void intra_recon(VP9TileData *td, ptrdiff_t y_off,
220  ptrdiff_t uv_off, int bytesperpixel)
221 {
222  VP9Context *s = td->s;
223  VP9Block *b = td->b;
224  int row = td->row, col = td->col;
225  int w4 = ff_vp9_bwh_tab[1][b->bs][0] << 1, step1d = 1 << b->tx, n;
226  int h4 = ff_vp9_bwh_tab[1][b->bs][1] << 1, x, y, step = 1 << (b->tx * 2);
227  int end_x = FFMIN(2 * (s->cols - col), w4);
228  int end_y = FFMIN(2 * (s->rows - row), h4);
229  int tx = 4 * s->s.h.lossless + b->tx, uvtx = b->uvtx + 4 * s->s.h.lossless;
230  int uvstep1d = 1 << b->uvtx, p;
231  uint8_t *dst = td->dst[0], *dst_r = s->s.frames[CUR_FRAME].tf.f->data[0] + y_off;
232  LOCAL_ALIGNED_32(uint8_t, a_buf, [96]);
233  LOCAL_ALIGNED_32(uint8_t, l, [64]);
234 
235  for (n = 0, y = 0; y < end_y; y += step1d) {
236  uint8_t *ptr = dst, *ptr_r = dst_r;
237  for (x = 0; x < end_x; x += step1d, ptr += 4 * step1d * bytesperpixel,
238  ptr_r += 4 * step1d * bytesperpixel, n += step) {
239  int mode = b->mode[b->bs > BS_8x8 && b->tx == TX_4X4 ?
240  y * 2 + x : 0];
241  uint8_t *a = &a_buf[32];
242  enum TxfmType txtp = ff_vp9_intra_txfm_type[mode];
243  int eob = b->skip ? 0 : b->tx > TX_8X8 ? AV_RN16A(&td->eob[n]) : td->eob[n];
244 
245  mode = check_intra_mode(td, mode, &a, ptr_r,
246  s->s.frames[CUR_FRAME].tf.f->linesize[0],
247  ptr, td->y_stride, l,
248  col, x, w4, row, y, b->tx, 0, 0, 0, bytesperpixel);
249  s->dsp.intra_pred[b->tx][mode](ptr, td->y_stride, l, a);
250  if (eob)
251  s->dsp.itxfm_add[tx][txtp](ptr, td->y_stride,
252  td->block + 16 * n * bytesperpixel, eob);
253  }
254  dst_r += 4 * step1d * s->s.frames[CUR_FRAME].tf.f->linesize[0];
255  dst += 4 * step1d * td->y_stride;
256  }
257 
258  // U/V
259  w4 >>= s->ss_h;
260  end_x >>= s->ss_h;
261  end_y >>= s->ss_v;
262  step = 1 << (b->uvtx * 2);
263  for (p = 0; p < 2; p++) {
264  dst = td->dst[1 + p];
265  dst_r = s->s.frames[CUR_FRAME].tf.f->data[1 + p] + uv_off;
266  for (n = 0, y = 0; y < end_y; y += uvstep1d) {
267  uint8_t *ptr = dst, *ptr_r = dst_r;
268  for (x = 0; x < end_x; x += uvstep1d, ptr += 4 * uvstep1d * bytesperpixel,
269  ptr_r += 4 * uvstep1d * bytesperpixel, n += step) {
270  int mode = b->uvmode;
271  uint8_t *a = &a_buf[32];
272  int eob = b->skip ? 0 : b->uvtx > TX_8X8 ? AV_RN16A(&td->uveob[p][n]) : td->uveob[p][n];
273 
274  mode = check_intra_mode(td, mode, &a, ptr_r,
275  s->s.frames[CUR_FRAME].tf.f->linesize[1],
276  ptr, td->uv_stride, l, col, x, w4, row, y,
277  b->uvtx, p + 1, s->ss_h, s->ss_v, bytesperpixel);
278  s->dsp.intra_pred[b->uvtx][mode](ptr, td->uv_stride, l, a);
279  if (eob)
280  s->dsp.itxfm_add[uvtx][DCT_DCT](ptr, td->uv_stride,
281  td->uvblock[p] + 16 * n * bytesperpixel, eob);
282  }
283  dst_r += 4 * uvstep1d * s->s.frames[CUR_FRAME].tf.f->linesize[1];
284  dst += 4 * uvstep1d * td->uv_stride;
285  }
286  }
287 }
288 
289 void ff_vp9_intra_recon_8bpp(VP9TileData *td, ptrdiff_t y_off, ptrdiff_t uv_off)
290 {
291  intra_recon(td, y_off, uv_off, 1);
292 }
293 
294 void ff_vp9_intra_recon_16bpp(VP9TileData *td, ptrdiff_t y_off, ptrdiff_t uv_off)
295 {
296  intra_recon(td, y_off, uv_off, 2);
297 }
298 
300  uint8_t *dst, ptrdiff_t dst_stride,
301  const uint8_t *ref, ptrdiff_t ref_stride,
302  ThreadFrame *ref_frame,
303  ptrdiff_t y, ptrdiff_t x, const VP56mv *mv,
304  int bw, int bh, int w, int h, int bytesperpixel)
305 {
306  VP9Context *s = td->s;
307  int mx = mv->x, my = mv->y, th;
308 
309  y += my >> 3;
310  x += mx >> 3;
311  ref += y * ref_stride + x * bytesperpixel;
312  mx &= 7;
313  my &= 7;
314  // FIXME bilinear filter only needs 0/1 pixels, not 3/4
315  // we use +7 because the last 7 pixels of each sbrow can be changed in
316  // the longest loopfilter of the next sbrow
317  th = (y + bh + 4 * !!my + 7) >> 6;
318  ff_thread_await_progress(ref_frame, FFMAX(th, 0), 0);
319  // The arm/aarch64 _hv filters read one more row than what actually is
320  // needed, so switch to emulated edge one pixel sooner vertically
321  // (!!my * 5) than horizontally (!!mx * 4).
322  if (x < !!mx * 3 || y < !!my * 3 ||
323  x + !!mx * 4 > w - bw || y + !!my * 5 > h - bh) {
324  s->vdsp.emulated_edge_mc(td->edge_emu_buffer,
325  ref - !!my * 3 * ref_stride - !!mx * 3 * bytesperpixel,
326  160, ref_stride,
327  bw + !!mx * 7, bh + !!my * 7,
328  x - !!mx * 3, y - !!my * 3, w, h);
329  ref = td->edge_emu_buffer + !!my * 3 * 160 + !!mx * 3 * bytesperpixel;
330  ref_stride = 160;
331  }
332  mc[!!mx][!!my](dst, dst_stride, ref, ref_stride, bh, mx << 1, my << 1);
333 }
334 
336  uint8_t *dst_u, uint8_t *dst_v,
337  ptrdiff_t dst_stride,
338  const uint8_t *ref_u, ptrdiff_t src_stride_u,
339  const uint8_t *ref_v, ptrdiff_t src_stride_v,
340  ThreadFrame *ref_frame,
341  ptrdiff_t y, ptrdiff_t x, const VP56mv *mv,
342  int bw, int bh, int w, int h, int bytesperpixel)
343 {
344  VP9Context *s = td->s;
345  int mx = mv->x * (1 << !s->ss_h), my = mv->y * (1 << !s->ss_v), th;
346 
347  y += my >> 4;
348  x += mx >> 4;
349  ref_u += y * src_stride_u + x * bytesperpixel;
350  ref_v += y * src_stride_v + x * bytesperpixel;
351  mx &= 15;
352  my &= 15;
353  // FIXME bilinear filter only needs 0/1 pixels, not 3/4
354  // we use +7 because the last 7 pixels of each sbrow can be changed in
355  // the longest loopfilter of the next sbrow
356  th = (y + bh + 4 * !!my + 7) >> (6 - s->ss_v);
357  ff_thread_await_progress(ref_frame, FFMAX(th, 0), 0);
358  // The arm/aarch64 _hv filters read one more row than what actually is
359  // needed, so switch to emulated edge one pixel sooner vertically
360  // (!!my * 5) than horizontally (!!mx * 4).
361  if (x < !!mx * 3 || y < !!my * 3 ||
362  x + !!mx * 4 > w - bw || y + !!my * 5 > h - bh) {
363  s->vdsp.emulated_edge_mc(td->edge_emu_buffer,
364  ref_u - !!my * 3 * src_stride_u - !!mx * 3 * bytesperpixel,
365  160, src_stride_u,
366  bw + !!mx * 7, bh + !!my * 7,
367  x - !!mx * 3, y - !!my * 3, w, h);
368  ref_u = td->edge_emu_buffer + !!my * 3 * 160 + !!mx * 3 * bytesperpixel;
369  mc[!!mx][!!my](dst_u, dst_stride, ref_u, 160, bh, mx, my);
370 
371  s->vdsp.emulated_edge_mc(td->edge_emu_buffer,
372  ref_v - !!my * 3 * src_stride_v - !!mx * 3 * bytesperpixel,
373  160, src_stride_v,
374  bw + !!mx * 7, bh + !!my * 7,
375  x - !!mx * 3, y - !!my * 3, w, h);
376  ref_v = td->edge_emu_buffer + !!my * 3 * 160 + !!mx * 3 * bytesperpixel;
377  mc[!!mx][!!my](dst_v, dst_stride, ref_v, 160, bh, mx, my);
378  } else {
379  mc[!!mx][!!my](dst_u, dst_stride, ref_u, src_stride_u, bh, mx, my);
380  mc[!!mx][!!my](dst_v, dst_stride, ref_v, src_stride_v, bh, mx, my);
381  }
382 }
383 
384 #define mc_luma_dir(td, mc, dst, dst_ls, src, src_ls, tref, row, col, mv, \
385  px, py, pw, ph, bw, bh, w, h, i) \
386  mc_luma_unscaled(td, s->dsp.mc, dst, dst_ls, src, src_ls, tref, row, col, \
387  mv, bw, bh, w, h, bytesperpixel)
388 #define mc_chroma_dir(td, mc, dstu, dstv, dst_ls, srcu, srcu_ls, srcv, srcv_ls, tref, \
389  row, col, mv, px, py, pw, ph, bw, bh, w, h, i) \
390  mc_chroma_unscaled(td, s->dsp.mc, dstu, dstv, dst_ls, srcu, srcu_ls, srcv, srcv_ls, tref, \
391  row, col, mv, bw, bh, w, h, bytesperpixel)
392 #define SCALED 0
393 #define FN(x) x##_8bpp
394 #define BYTES_PER_PIXEL 1
395 #include "vp9_mc_template.c"
396 #undef FN
397 #undef BYTES_PER_PIXEL
398 #define FN(x) x##_16bpp
399 #define BYTES_PER_PIXEL 2
400 #include "vp9_mc_template.c"
401 #undef mc_luma_dir
402 #undef mc_chroma_dir
403 #undef FN
404 #undef BYTES_PER_PIXEL
405 #undef SCALED
406 
408  vp9_mc_func (*mc)[2],
409  uint8_t *dst, ptrdiff_t dst_stride,
410  const uint8_t *ref, ptrdiff_t ref_stride,
411  ThreadFrame *ref_frame,
412  ptrdiff_t y, ptrdiff_t x, const VP56mv *in_mv,
413  int px, int py, int pw, int ph,
414  int bw, int bh, int w, int h, int bytesperpixel,
415  const uint16_t *scale, const uint8_t *step)
416 {
417  VP9Context *s = td->s;
418  if (s->s.frames[CUR_FRAME].tf.f->width == ref_frame->f->width &&
419  s->s.frames[CUR_FRAME].tf.f->height == ref_frame->f->height) {
420  mc_luma_unscaled(td, mc, dst, dst_stride, ref, ref_stride, ref_frame,
421  y, x, in_mv, bw, bh, w, h, bytesperpixel);
422  } else {
423 #define scale_mv(n, dim) (((int64_t)(n) * scale[dim]) >> 14)
424  int mx, my;
425  int refbw_m1, refbh_m1;
426  int th;
427  VP56mv mv;
428 
429  mv.x = av_clip(in_mv->x, -(x + pw - px + 4) * 8, (s->cols * 8 - x + px + 3) * 8);
430  mv.y = av_clip(in_mv->y, -(y + ph - py + 4) * 8, (s->rows * 8 - y + py + 3) * 8);
431  // BUG libvpx seems to scale the two components separately. This introduces
432  // rounding errors but we have to reproduce them to be exactly compatible
433  // with the output from libvpx...
434  mx = scale_mv(mv.x * 2, 0) + scale_mv(x * 16, 0);
435  my = scale_mv(mv.y * 2, 1) + scale_mv(y * 16, 1);
436 
437  y = my >> 4;
438  x = mx >> 4;
439  ref += y * ref_stride + x * bytesperpixel;
440  mx &= 15;
441  my &= 15;
442  refbw_m1 = ((bw - 1) * step[0] + mx) >> 4;
443  refbh_m1 = ((bh - 1) * step[1] + my) >> 4;
444  // FIXME bilinear filter only needs 0/1 pixels, not 3/4
445  // we use +7 because the last 7 pixels of each sbrow can be changed in
446  // the longest loopfilter of the next sbrow
447  th = (y + refbh_m1 + 4 + 7) >> 6;
448  ff_thread_await_progress(ref_frame, FFMAX(th, 0), 0);
449  // The arm/aarch64 _hv filters read one more row than what actually is
450  // needed, so switch to emulated edge one pixel sooner vertically
451  // (y + 5 >= h - refbh_m1) than horizontally (x + 4 >= w - refbw_m1).
452  if (x < 3 || y < 3 || x + 4 >= w - refbw_m1 || y + 5 >= h - refbh_m1) {
453  s->vdsp.emulated_edge_mc(td->edge_emu_buffer,
454  ref - 3 * ref_stride - 3 * bytesperpixel,
455  288, ref_stride,
456  refbw_m1 + 8, refbh_m1 + 8,
457  x - 3, y - 3, w, h);
458  ref = td->edge_emu_buffer + 3 * 288 + 3 * bytesperpixel;
459  ref_stride = 288;
460  }
461  smc(dst, dst_stride, ref, ref_stride, bh, mx, my, step[0], step[1]);
462  }
463 }
464 
466  vp9_mc_func (*mc)[2],
467  uint8_t *dst_u, uint8_t *dst_v,
468  ptrdiff_t dst_stride,
469  const uint8_t *ref_u, ptrdiff_t src_stride_u,
470  const uint8_t *ref_v, ptrdiff_t src_stride_v,
471  ThreadFrame *ref_frame,
472  ptrdiff_t y, ptrdiff_t x, const VP56mv *in_mv,
473  int px, int py, int pw, int ph,
474  int bw, int bh, int w, int h, int bytesperpixel,
475  const uint16_t *scale, const uint8_t *step)
476 {
477  VP9Context *s = td->s;
478  if (s->s.frames[CUR_FRAME].tf.f->width == ref_frame->f->width &&
479  s->s.frames[CUR_FRAME].tf.f->height == ref_frame->f->height) {
480  mc_chroma_unscaled(td, mc, dst_u, dst_v, dst_stride, ref_u, src_stride_u,
481  ref_v, src_stride_v, ref_frame,
482  y, x, in_mv, bw, bh, w, h, bytesperpixel);
483  } else {
484  int mx, my;
485  int refbw_m1, refbh_m1;
486  int th;
487  VP56mv mv;
488 
489  if (s->ss_h) {
490  // BUG https://code.google.com/p/webm/issues/detail?id=820
491  mv.x = av_clip(in_mv->x, -(x + pw - px + 4) * 16, (s->cols * 4 - x + px + 3) * 16);
492  mx = scale_mv(mv.x, 0) + (scale_mv(x * 16, 0) & ~15) + (scale_mv(x * 32, 0) & 15);
493  } else {
494  mv.x = av_clip(in_mv->x, -(x + pw - px + 4) * 8, (s->cols * 8 - x + px + 3) * 8);
495  mx = scale_mv(mv.x * 2, 0) + scale_mv(x * 16, 0);
496  }
497  if (s->ss_v) {
498  // BUG https://code.google.com/p/webm/issues/detail?id=820
499  mv.y = av_clip(in_mv->y, -(y + ph - py + 4) * 16, (s->rows * 4 - y + py + 3) * 16);
500  my = scale_mv(mv.y, 1) + (scale_mv(y * 16, 1) & ~15) + (scale_mv(y * 32, 1) & 15);
501  } else {
502  mv.y = av_clip(in_mv->y, -(y + ph - py + 4) * 8, (s->rows * 8 - y + py + 3) * 8);
503  my = scale_mv(mv.y * 2, 1) + scale_mv(y * 16, 1);
504  }
505 #undef scale_mv
506  y = my >> 4;
507  x = mx >> 4;
508  ref_u += y * src_stride_u + x * bytesperpixel;
509  ref_v += y * src_stride_v + x * bytesperpixel;
510  mx &= 15;
511  my &= 15;
512  refbw_m1 = ((bw - 1) * step[0] + mx) >> 4;
513  refbh_m1 = ((bh - 1) * step[1] + my) >> 4;
514  // FIXME bilinear filter only needs 0/1 pixels, not 3/4
515  // we use +7 because the last 7 pixels of each sbrow can be changed in
516  // the longest loopfilter of the next sbrow
517  th = (y + refbh_m1 + 4 + 7) >> (6 - s->ss_v);
518  ff_thread_await_progress(ref_frame, FFMAX(th, 0), 0);
519  // The arm/aarch64 _hv filters read one more row than what actually is
520  // needed, so switch to emulated edge one pixel sooner vertically
521  // (y + 5 >= h - refbh_m1) than horizontally (x + 4 >= w - refbw_m1).
522  if (x < 3 || y < 3 || x + 4 >= w - refbw_m1 || y + 5 >= h - refbh_m1) {
523  s->vdsp.emulated_edge_mc(td->edge_emu_buffer,
524  ref_u - 3 * src_stride_u - 3 * bytesperpixel,
525  288, src_stride_u,
526  refbw_m1 + 8, refbh_m1 + 8,
527  x - 3, y - 3, w, h);
528  ref_u = td->edge_emu_buffer + 3 * 288 + 3 * bytesperpixel;
529  smc(dst_u, dst_stride, ref_u, 288, bh, mx, my, step[0], step[1]);
530 
531  s->vdsp.emulated_edge_mc(td->edge_emu_buffer,
532  ref_v - 3 * src_stride_v - 3 * bytesperpixel,
533  288, src_stride_v,
534  refbw_m1 + 8, refbh_m1 + 8,
535  x - 3, y - 3, w, h);
536  ref_v = td->edge_emu_buffer + 3 * 288 + 3 * bytesperpixel;
537  smc(dst_v, dst_stride, ref_v, 288, bh, mx, my, step[0], step[1]);
538  } else {
539  smc(dst_u, dst_stride, ref_u, src_stride_u, bh, mx, my, step[0], step[1]);
540  smc(dst_v, dst_stride, ref_v, src_stride_v, bh, mx, my, step[0], step[1]);
541  }
542  }
543 }
544 
545 #define mc_luma_dir(td, mc, dst, dst_ls, src, src_ls, tref, row, col, mv, \
546  px, py, pw, ph, bw, bh, w, h, i) \
547  mc_luma_scaled(td, s->dsp.s##mc, s->dsp.mc, dst, dst_ls, src, src_ls, tref, row, col, \
548  mv, px, py, pw, ph, bw, bh, w, h, bytesperpixel, \
549  s->mvscale[b->ref[i]], s->mvstep[b->ref[i]])
550 #define mc_chroma_dir(td, mc, dstu, dstv, dst_ls, srcu, srcu_ls, srcv, srcv_ls, tref, \
551  row, col, mv, px, py, pw, ph, bw, bh, w, h, i) \
552  mc_chroma_scaled(td, s->dsp.s##mc, s->dsp.mc, dstu, dstv, dst_ls, srcu, srcu_ls, srcv, srcv_ls, tref, \
553  row, col, mv, px, py, pw, ph, bw, bh, w, h, bytesperpixel, \
554  s->mvscale[b->ref[i]], s->mvstep[b->ref[i]])
555 #define SCALED 1
556 #define FN(x) x##_scaled_8bpp
557 #define BYTES_PER_PIXEL 1
558 #include "vp9_mc_template.c"
559 #undef FN
560 #undef BYTES_PER_PIXEL
561 #define FN(x) x##_scaled_16bpp
562 #define BYTES_PER_PIXEL 2
563 #include "vp9_mc_template.c"
564 #undef mc_luma_dir
565 #undef mc_chroma_dir
566 #undef FN
567 #undef BYTES_PER_PIXEL
568 #undef SCALED
569 
570 static av_always_inline void inter_recon(VP9TileData *td, int bytesperpixel)
571 {
572  VP9Context *s = td->s;
573  VP9Block *b = td->b;
574  int row = td->row, col = td->col;
575 
576  if (s->mvscale[b->ref[0]][0] == REF_INVALID_SCALE ||
577  (b->comp && s->mvscale[b->ref[1]][0] == REF_INVALID_SCALE)) {
578  if (!s->td->error_info) {
579  s->td->error_info = AVERROR_INVALIDDATA;
580  av_log(NULL, AV_LOG_ERROR, "Bitstream not supported, "
581  "reference frame has invalid dimensions\n");
582  }
583  return;
584  }
585 
586  if (s->mvscale[b->ref[0]][0] || (b->comp && s->mvscale[b->ref[1]][0])) {
587  if (bytesperpixel == 1) {
588  inter_pred_scaled_8bpp(td);
589  } else {
590  inter_pred_scaled_16bpp(td);
591  }
592  } else {
593  if (bytesperpixel == 1) {
594  inter_pred_8bpp(td);
595  } else {
596  inter_pred_16bpp(td);
597  }
598  }
599 
600  if (!b->skip) {
601  /* mostly copied intra_recon() */
602 
603  int w4 = ff_vp9_bwh_tab[1][b->bs][0] << 1, step1d = 1 << b->tx, n;
604  int h4 = ff_vp9_bwh_tab[1][b->bs][1] << 1, x, y, step = 1 << (b->tx * 2);
605  int end_x = FFMIN(2 * (s->cols - col), w4);
606  int end_y = FFMIN(2 * (s->rows - row), h4);
607  int tx = 4 * s->s.h.lossless + b->tx, uvtx = b->uvtx + 4 * s->s.h.lossless;
608  int uvstep1d = 1 << b->uvtx, p;
609  uint8_t *dst = td->dst[0];
610 
611  // y itxfm add
612  for (n = 0, y = 0; y < end_y; y += step1d) {
613  uint8_t *ptr = dst;
614  for (x = 0; x < end_x; x += step1d,
615  ptr += 4 * step1d * bytesperpixel, n += step) {
616  int eob = b->tx > TX_8X8 ? AV_RN16A(&td->eob[n]) : td->eob[n];
617 
618  if (eob)
619  s->dsp.itxfm_add[tx][DCT_DCT](ptr, td->y_stride,
620  td->block + 16 * n * bytesperpixel, eob);
621  }
622  dst += 4 * td->y_stride * step1d;
623  }
624 
625  // uv itxfm add
626  end_x >>= s->ss_h;
627  end_y >>= s->ss_v;
628  step = 1 << (b->uvtx * 2);
629  for (p = 0; p < 2; p++) {
630  dst = td->dst[p + 1];
631  for (n = 0, y = 0; y < end_y; y += uvstep1d) {
632  uint8_t *ptr = dst;
633  for (x = 0; x < end_x; x += uvstep1d,
634  ptr += 4 * uvstep1d * bytesperpixel, n += step) {
635  int eob = b->uvtx > TX_8X8 ? AV_RN16A(&td->uveob[p][n]) : td->uveob[p][n];
636 
637  if (eob)
638  s->dsp.itxfm_add[uvtx][DCT_DCT](ptr, td->uv_stride,
639  td->uvblock[p] + 16 * n * bytesperpixel, eob);
640  }
641  dst += 4 * uvstep1d * td->uv_stride;
642  }
643  }
644  }
645 }
646 
648 {
649  inter_recon(td, 1);
650 }
651 
653 {
654  inter_recon(td, 2);
655 }
#define av_always_inline
Definition: attributes.h:45
uint8_t
simple assert() macros that are a bit more flexible than ISO C assert().
#define av_assert2(cond)
assert() equivalent, that does lie in speed critical code.
Definition: avassert.h:64
Libavcodec external API header.
#define s(width, name)
Definition: cbs_vp9.c:257
#define FFMIN(a, b)
Definition: common.h:105
#define av_clip
Definition: common.h:122
#define FFMAX(a, b)
Definition: common.h:103
#define NULL
Definition: coverity.c:32
mode
Use these values in ebur128_init (or'ed).
Definition: ebur128.h:83
#define AVERROR_INVALIDDATA
Invalid data found when processing input.
Definition: error.h:59
#define AV_LOG_ERROR
Something went wrong and cannot losslessly be recovered.
Definition: log.h:194
int i
Definition: input.c:407
#define AV_RN16A(p)
Definition: intreadwrite.h:522
static const int8_t mv[256][2]
Definition: 4xm.c:78
common internal API header
uint8_t w
Definition: llviddspenc.c:39
int stride
Definition: mace.c:144
#define LOCAL_ALIGNED_32(t, v,...)
Definition: mem_internal.h:136
void ff_thread_await_progress(ThreadFrame *f, int n, int field)
Wait for earlier decoding threads to finish reference pictures.
#define th
Definition: regdef.h:75
#define td
Definition: regdef.h:70
int width
Definition: frame.h:376
int height
Definition: frame.h:376
AVFrame * f
Definition: thread.h:35
Definition: vp56.h:68
int16_t y
Definition: vp56.h:70
int16_t x
Definition: vp56.h:69
#define av_log(a,...)
static int ref[MAX_W *MAX_W]
Definition: jpeg2000dwt.c:107
#define mc
const char * b
Definition: vf_curves.c:118
Core video DSP helper functions.
@ VERT_RIGHT_PRED
Definition: vp9.h:51
@ TOP_DC_PRED
Definition: vp9.h:57
@ VERT_LEFT_PRED
Definition: vp9.h:53
@ HOR_UP_PRED
Definition: vp9.h:54
@ DC_127_PRED
Definition: vp9.h:59
@ TM_VP8_PRED
Definition: vp9.h:55
@ LEFT_DC_PRED
Definition: vp9.h:56
@ DIAG_DOWN_LEFT_PRED
Definition: vp9.h:49
@ DIAG_DOWN_RIGHT_PRED
Definition: vp9.h:50
@ DC_128_PRED
Definition: vp9.h:58
@ VERT_PRED
Definition: vp9.h:46
@ N_INTRA_PRED_MODES
Definition: vp9.h:61
@ HOR_DOWN_PRED
Definition: vp9.h:52
@ HOR_PRED
Definition: vp9.h:47
@ DC_129_PRED
Definition: vp9.h:60
@ DC_PRED
Definition: vp9.h:48
TxfmType
Definition: vp9.h:37
@ DCT_DCT
Definition: vp9.h:38
TxfmMode
Definition: vp9.h:27
@ TX_4X4
Definition: vp9.h:28
@ TX_8X8
Definition: vp9.h:29
enum TxfmType ff_vp9_intra_txfm_type[14]
Definition: vp9data.c:437
const uint8_t ff_vp9_bwh_tab[2][N_BS_SIZES][2]
Definition: vp9data.c:25
#define REF_INVALID_SCALE
Definition: vp9dec.h:40
void(* vp9_scaled_mc_func)(uint8_t *dst, ptrdiff_t dst_stride, const uint8_t *ref, ptrdiff_t ref_stride, int h, int mx, int my, int dx, int dy)
Definition: vp9dsp.h:35
void(* vp9_mc_func)(uint8_t *dst, ptrdiff_t dst_stride, const uint8_t *ref, ptrdiff_t ref_stride, int h, int mx, int my)
Definition: vp9dsp.h:32
static av_always_inline void mc_chroma_unscaled(VP9TileData *td, vp9_mc_func(*mc)[2], uint8_t *dst_u, uint8_t *dst_v, ptrdiff_t dst_stride, const uint8_t *ref_u, ptrdiff_t src_stride_u, const uint8_t *ref_v, ptrdiff_t src_stride_v, ThreadFrame *ref_frame, ptrdiff_t y, ptrdiff_t x, const VP56mv *mv, int bw, int bh, int w, int h, int bytesperpixel)
Definition: vp9recon.c:335
void ff_vp9_intra_recon_16bpp(VP9TileData *td, ptrdiff_t y_off, ptrdiff_t uv_off)
Definition: vp9recon.c:294
#define assign_val(c, i, v)
#define assign_bpp(c, i1, v, i2)
#define memset_bpp(c, i1, v, i2, num)
void ff_vp9_inter_recon_16bpp(VP9TileData *td)
Definition: vp9recon.c:652
#define scale_mv(n, dim)
static av_always_inline int check_intra_mode(VP9TileData *td, int mode, uint8_t **a, uint8_t *dst_edge, ptrdiff_t stride_edge, uint8_t *dst_inner, ptrdiff_t stride_inner, uint8_t *l, int col, int x, int w, int row, int y, enum TxfmMode tx, int p, int ss_h, int ss_v, int bytesperpixel)
Definition: vp9recon.c:33
void ff_vp9_inter_recon_8bpp(VP9TileData *td)
Definition: vp9recon.c:647
static av_always_inline void intra_recon(VP9TileData *td, ptrdiff_t y_off, ptrdiff_t uv_off, int bytesperpixel)
Definition: vp9recon.c:219
static av_always_inline void inter_recon(VP9TileData *td, int bytesperpixel)
Definition: vp9recon.c:570
static av_always_inline void mc_chroma_scaled(VP9TileData *td, vp9_scaled_mc_func smc, vp9_mc_func(*mc)[2], uint8_t *dst_u, uint8_t *dst_v, ptrdiff_t dst_stride, const uint8_t *ref_u, ptrdiff_t src_stride_u, const uint8_t *ref_v, ptrdiff_t src_stride_v, ThreadFrame *ref_frame, ptrdiff_t y, ptrdiff_t x, const VP56mv *in_mv, int px, int py, int pw, int ph, int bw, int bh, int w, int h, int bytesperpixel, const uint16_t *scale, const uint8_t *step)
Definition: vp9recon.c:465
static av_always_inline void mc_luma_unscaled(VP9TileData *td, vp9_mc_func(*mc)[2], uint8_t *dst, ptrdiff_t dst_stride, const uint8_t *ref, ptrdiff_t ref_stride, ThreadFrame *ref_frame, ptrdiff_t y, ptrdiff_t x, const VP56mv *mv, int bw, int bh, int w, int h, int bytesperpixel)
Definition: vp9recon.c:299
static av_always_inline void mc_luma_scaled(VP9TileData *td, vp9_scaled_mc_func smc, vp9_mc_func(*mc)[2], uint8_t *dst, ptrdiff_t dst_stride, const uint8_t *ref, ptrdiff_t ref_stride, ThreadFrame *ref_frame, ptrdiff_t y, ptrdiff_t x, const VP56mv *in_mv, int px, int py, int pw, int ph, int bw, int bh, int w, int h, int bytesperpixel, const uint16_t *scale, const uint8_t *step)
Definition: vp9recon.c:407
void ff_vp9_intra_recon_8bpp(VP9TileData *td, ptrdiff_t y_off, ptrdiff_t uv_off)
Definition: vp9recon.c:289
#define memset_val(c, val, num)
#define CUR_FRAME
Definition: vp9shared.h:163
@ BS_8x8
Definition: vp9shared.h:87