aboutsummaryrefslogtreecommitdiffstats
path: root/utils
diff options
context:
space:
mode:
authorHans Verkuil <hans.verkuil@cisco.com>2018-08-30 11:16:06 +0200
committerHans Verkuil <hans.verkuil@cisco.com>2018-08-31 14:37:10 +0200
commit4cebd11836135287340f12a4e3df92a881157a5c (patch)
tree8b2b16733c117e1622187a26c3f91ab0f9f64886 /utils
parentfcccd99ebdc6ff01296f57948c52dacc9c1d2695 (diff)
v4l-utils: sync with media master
This adds the FWHT codec support from the vicodec kernel driver. Signed-off-by: Hans Verkuil <hans.verkuil@cisco.com>
Diffstat (limited to 'utils')
-rw-r--r--utils/common/codec-fwht.c855
-rw-r--r--utils/common/codec-fwht.h137
-rw-r--r--utils/common/codec-fwht.patch27
-rw-r--r--utils/common/codec-v4l2-fwht.c325
-rw-r--r--utils/common/codec-v4l2-fwht.h50
5 files changed, 1394 insertions, 0 deletions
diff --git a/utils/common/codec-fwht.c b/utils/common/codec-fwht.c
new file mode 100644
index 00000000..47939160
--- /dev/null
+++ b/utils/common/codec-fwht.c
@@ -0,0 +1,855 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * Copyright 2016 Tom aan de Wiel
+ * Copyright 2018 Cisco Systems, Inc. and/or its affiliates. All rights reserved.
+ *
+ * 8x8 Fast Walsh Hadamard Transform in sequency order based on the paper:
+ *
+ * A Recursive Algorithm for Sequency-Ordered Fast Walsh Transforms,
+ * R.D. Brown, 1977
+ */
+
+#include <linux/string.h>
+#include "codec-fwht.h"
+
+/*
+ * Note: bit 0 of the header must always be 0. Otherwise it cannot
+ * be guaranteed that the magic 8 byte sequence (see below) can
+ * never occur in the rlc output.
+ */
+#define PFRAME_BIT BIT(15)
+#define DUPS_MASK 0x1ffe
+
+#define PBLOCK 0
+#define IBLOCK 1
+
+#define ALL_ZEROS 15
+
+static const uint8_t zigzag[64] = {
+ 0,
+ 1, 8,
+ 2, 9, 16,
+ 3, 10, 17, 24,
+ 4, 11, 18, 25, 32,
+ 5, 12, 19, 26, 33, 40,
+ 6, 13, 20, 27, 34, 41, 48,
+ 7, 14, 21, 28, 35, 42, 49, 56,
+ 15, 22, 29, 36, 43, 50, 57,
+ 23, 30, 37, 44, 51, 58,
+ 31, 38, 45, 52, 59,
+ 39, 46, 53, 60,
+ 47, 54, 61,
+ 55, 62,
+ 63,
+};
+
+
+static int rlc(const s16 *in, __be16 *output, int blocktype)
+{
+ s16 block[8 * 8];
+ s16 *wp = block;
+ int i = 0;
+ int x, y;
+ int ret = 0;
+
+ /* read in block from framebuffer */
+ int lastzero_run = 0;
+ int to_encode;
+
+ for (y = 0; y < 8; y++) {
+ for (x = 0; x < 8; x++) {
+ *wp = in[x + y * 8];
+ wp++;
+ }
+ }
+
+ /* keep track of amount of trailing zeros */
+ for (i = 63; i >= 0 && !block[zigzag[i]]; i--)
+ lastzero_run++;
+
+ *output++ = (blocktype == PBLOCK ? htons(PFRAME_BIT) : 0);
+ ret++;
+
+ to_encode = 8 * 8 - (lastzero_run > 14 ? lastzero_run : 0);
+
+ i = 0;
+ while (i < to_encode) {
+ int cnt = 0;
+ int tmp;
+
+ /* count leading zeros */
+ while ((tmp = block[zigzag[i]]) == 0 && cnt < 14) {
+ cnt++;
+ i++;
+ if (i == to_encode) {
+ cnt--;
+ break;
+ }
+ }
+ /* 4 bits for run, 12 for coefficient (quantization by 4) */
+ *output++ = htons((cnt | tmp << 4));
+ i++;
+ ret++;
+ }
+ if (lastzero_run > 14) {
+ *output = htons(ALL_ZEROS | 0);
+ ret++;
+ }
+
+ return ret;
+}
+
+/*
+ * This function will worst-case increase rlc_in by 65*2 bytes:
+ * one s16 value for the header and 8 * 8 coefficients of type s16.
+ */
+static s16 derlc(const __be16 **rlc_in, s16 *dwht_out)
+{
+ /* header */
+ const __be16 *input = *rlc_in;
+ s16 ret = ntohs(*input++);
+ int dec_count = 0;
+ s16 block[8 * 8 + 16];
+ s16 *wp = block;
+ int i;
+
+ /*
+ * Now de-compress, it expands one byte to up to 15 bytes
+ * (or fills the remainder of the 64 bytes with zeroes if it
+ * is the last byte to expand).
+ *
+ * So block has to be 8 * 8 + 16 bytes, the '+ 16' is to
+ * allow for overflow if the incoming data was malformed.
+ */
+ while (dec_count < 8 * 8) {
+ s16 in = ntohs(*input++);
+ int length = in & 0xf;
+ int coeff = in >> 4;
+
+ /* fill remainder with zeros */
+ if (length == 15) {
+ for (i = 0; i < 64 - dec_count; i++)
+ *wp++ = 0;
+ break;
+ }
+
+ for (i = 0; i < length; i++)
+ *wp++ = 0;
+ *wp++ = coeff;
+ dec_count += length + 1;
+ }
+
+ wp = block;
+
+ for (i = 0; i < 64; i++) {
+ int pos = zigzag[i];
+ int y = pos / 8;
+ int x = pos % 8;
+
+ dwht_out[x + y * 8] = *wp++;
+ }
+ *rlc_in = input;
+ return ret;
+}
+
+static const int quant_table[] = {
+ 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 3,
+ 2, 2, 2, 2, 2, 2, 3, 6,
+ 2, 2, 2, 2, 2, 3, 6, 6,
+ 2, 2, 2, 2, 3, 6, 6, 6,
+ 2, 2, 2, 3, 6, 6, 6, 6,
+ 2, 2, 3, 6, 6, 6, 6, 8,
+};
+
+static const int quant_table_p[] = {
+ 3, 3, 3, 3, 3, 3, 3, 3,
+ 3, 3, 3, 3, 3, 3, 3, 3,
+ 3, 3, 3, 3, 3, 3, 3, 3,
+ 3, 3, 3, 3, 3, 3, 3, 6,
+ 3, 3, 3, 3, 3, 3, 6, 6,
+ 3, 3, 3, 3, 3, 6, 6, 9,
+ 3, 3, 3, 3, 6, 6, 9, 9,
+ 3, 3, 3, 6, 6, 9, 9, 10,
+};
+
+static void quantize_intra(s16 *coeff, s16 *de_coeff, u16 qp)
+{
+ const int *quant = quant_table;
+ int i, j;
+
+ for (j = 0; j < 8; j++) {
+ for (i = 0; i < 8; i++, quant++, coeff++, de_coeff++) {
+ *coeff >>= *quant;
+ if (*coeff >= -qp && *coeff <= qp)
+ *coeff = *de_coeff = 0;
+ else
+ *de_coeff = *coeff << *quant;
+ }
+ }
+}
+
+static void dequantize_intra(s16 *coeff)
+{
+ const int *quant = quant_table;
+ int i, j;
+
+ for (j = 0; j < 8; j++)
+ for (i = 0; i < 8; i++, quant++, coeff++)
+ *coeff <<= *quant;
+}
+
+static void quantize_inter(s16 *coeff, s16 *de_coeff, u16 qp)
+{
+ const int *quant = quant_table_p;
+ int i, j;
+
+ for (j = 0; j < 8; j++) {
+ for (i = 0; i < 8; i++, quant++, coeff++, de_coeff++) {
+ *coeff >>= *quant;
+ if (*coeff >= -qp && *coeff <= qp)
+ *coeff = *de_coeff = 0;
+ else
+ *de_coeff = *coeff << *quant;
+ }
+ }
+}
+
+static void dequantize_inter(s16 *coeff)
+{
+ const int *quant = quant_table_p;
+ int i, j;
+
+ for (j = 0; j < 8; j++)
+ for (i = 0; i < 8; i++, quant++, coeff++)
+ *coeff <<= *quant;
+}
+
+static void fwht(const u8 *block, s16 *output_block, unsigned int stride,
+ unsigned int input_step, bool intra)
+{
+ /* we'll need more than 8 bits for the transformed coefficients */
+ s32 workspace1[8], workspace2[8];
+ const u8 *tmp = block;
+ s16 *out = output_block;
+ int add = intra ? 256 : 0;
+ unsigned int i;
+
+ /* stage 1 */
+ stride *= input_step;
+
+ for (i = 0; i < 8; i++, tmp += stride, out += 8) {
+ switch (input_step) {
+ case 1:
+ workspace1[0] = tmp[0] + tmp[1] - add;
+ workspace1[1] = tmp[0] - tmp[1];
+
+ workspace1[2] = tmp[2] + tmp[3] - add;
+ workspace1[3] = tmp[2] - tmp[3];
+
+ workspace1[4] = tmp[4] + tmp[5] - add;
+ workspace1[5] = tmp[4] - tmp[5];
+
+ workspace1[6] = tmp[6] + tmp[7] - add;
+ workspace1[7] = tmp[6] - tmp[7];
+ break;
+ case 2:
+ workspace1[0] = tmp[0] + tmp[2] - add;
+ workspace1[1] = tmp[0] - tmp[2];
+
+ workspace1[2] = tmp[4] + tmp[6] - add;
+ workspace1[3] = tmp[4] - tmp[6];
+
+ workspace1[4] = tmp[8] + tmp[10] - add;
+ workspace1[5] = tmp[8] - tmp[10];
+
+ workspace1[6] = tmp[12] + tmp[14] - add;
+ workspace1[7] = tmp[12] - tmp[14];
+ break;
+ case 3:
+ workspace1[0] = tmp[0] + tmp[3] - add;
+ workspace1[1] = tmp[0] - tmp[3];
+
+ workspace1[2] = tmp[6] + tmp[9] - add;
+ workspace1[3] = tmp[6] - tmp[9];
+
+ workspace1[4] = tmp[12] + tmp[15] - add;
+ workspace1[5] = tmp[12] - tmp[15];
+
+ workspace1[6] = tmp[18] + tmp[21] - add;
+ workspace1[7] = tmp[18] - tmp[21];
+ break;
+ default:
+ workspace1[0] = tmp[0] + tmp[4] - add;
+ workspace1[1] = tmp[0] - tmp[4];
+
+ workspace1[2] = tmp[8] + tmp[12] - add;
+ workspace1[3] = tmp[8] - tmp[12];
+
+ workspace1[4] = tmp[16] + tmp[20] - add;
+ workspace1[5] = tmp[16] - tmp[20];
+
+ workspace1[6] = tmp[24] + tmp[28] - add;
+ workspace1[7] = tmp[24] - tmp[28];
+ break;
+ }
+
+ /* stage 2 */
+ workspace2[0] = workspace1[0] + workspace1[2];
+ workspace2[1] = workspace1[0] - workspace1[2];
+ workspace2[2] = workspace1[1] - workspace1[3];
+ workspace2[3] = workspace1[1] + workspace1[3];
+
+ workspace2[4] = workspace1[4] + workspace1[6];
+ workspace2[5] = workspace1[4] - workspace1[6];
+ workspace2[6] = workspace1[5] - workspace1[7];
+ workspace2[7] = workspace1[5] + workspace1[7];
+
+ /* stage 3 */
+ out[0] = workspace2[0] + workspace2[4];
+ out[1] = workspace2[0] - workspace2[4];
+ out[2] = workspace2[1] - workspace2[5];
+ out[3] = workspace2[1] + workspace2[5];
+ out[4] = workspace2[2] + workspace2[6];
+ out[5] = workspace2[2] - workspace2[6];
+ out[6] = workspace2[3] - workspace2[7];
+ out[7] = workspace2[3] + workspace2[7];
+ }
+
+ out = output_block;
+
+ for (i = 0; i < 8; i++, out++) {
+ /* stage 1 */
+ workspace1[0] = out[0] + out[1 * 8];
+ workspace1[1] = out[0] - out[1 * 8];
+
+ workspace1[2] = out[2 * 8] + out[3 * 8];
+ workspace1[3] = out[2 * 8] - out[3 * 8];
+
+ workspace1[4] = out[4 * 8] + out[5 * 8];
+ workspace1[5] = out[4 * 8] - out[5 * 8];
+
+ workspace1[6] = out[6 * 8] + out[7 * 8];
+ workspace1[7] = out[6 * 8] - out[7 * 8];
+
+ /* stage 2 */
+ workspace2[0] = workspace1[0] + workspace1[2];
+ workspace2[1] = workspace1[0] - workspace1[2];
+ workspace2[2] = workspace1[1] - workspace1[3];
+ workspace2[3] = workspace1[1] + workspace1[3];
+
+ workspace2[4] = workspace1[4] + workspace1[6];
+ workspace2[5] = workspace1[4] - workspace1[6];
+ workspace2[6] = workspace1[5] - workspace1[7];
+ workspace2[7] = workspace1[5] + workspace1[7];
+ /* stage 3 */
+ out[0 * 8] = workspace2[0] + workspace2[4];
+ out[1 * 8] = workspace2[0] - workspace2[4];
+ out[2 * 8] = workspace2[1] - workspace2[5];
+ out[3 * 8] = workspace2[1] + workspace2[5];
+ out[4 * 8] = workspace2[2] + workspace2[6];
+ out[5 * 8] = workspace2[2] - workspace2[6];
+ out[6 * 8] = workspace2[3] - workspace2[7];
+ out[7 * 8] = workspace2[3] + workspace2[7];
+ }
+}
+
+/*
+ * Not the nicest way of doing it, but P-blocks get twice the range of
+ * that of the I-blocks. Therefore we need a type bigger than 8 bits.
+ * Furthermore values can be negative... This is just a version that
+ * works with 16 signed data
+ */
+static void fwht16(const s16 *block, s16 *output_block, int stride, int intra)
+{
+ /* we'll need more than 8 bits for the transformed coefficients */
+ s32 workspace1[8], workspace2[8];
+ const s16 *tmp = block;
+ s16 *out = output_block;
+ int i;
+
+ for (i = 0; i < 8; i++, tmp += stride, out += 8) {
+ /* stage 1 */
+ workspace1[0] = tmp[0] + tmp[1];
+ workspace1[1] = tmp[0] - tmp[1];
+
+ workspace1[2] = tmp[2] + tmp[3];
+ workspace1[3] = tmp[2] - tmp[3];
+
+ workspace1[4] = tmp[4] + tmp[5];
+ workspace1[5] = tmp[4] - tmp[5];
+
+ workspace1[6] = tmp[6] + tmp[7];
+ workspace1[7] = tmp[6] - tmp[7];
+
+ /* stage 2 */
+ workspace2[0] = workspace1[0] + workspace1[2];
+ workspace2[1] = workspace1[0] - workspace1[2];
+ workspace2[2] = workspace1[1] - workspace1[3];
+ workspace2[3] = workspace1[1] + workspace1[3];
+
+ workspace2[4] = workspace1[4] + workspace1[6];
+ workspace2[5] = workspace1[4] - workspace1[6];
+ workspace2[6] = workspace1[5] - workspace1[7];
+ workspace2[7] = workspace1[5] + workspace1[7];
+
+ /* stage 3 */
+ out[0] = workspace2[0] + workspace2[4];
+ out[1] = workspace2[0] - workspace2[4];
+ out[2] = workspace2[1] - workspace2[5];
+ out[3] = workspace2[1] + workspace2[5];
+ out[4] = workspace2[2] + workspace2[6];
+ out[5] = workspace2[2] - workspace2[6];
+ out[6] = workspace2[3] - workspace2[7];
+ out[7] = workspace2[3] + workspace2[7];
+ }
+
+ out = output_block;
+
+ for (i = 0; i < 8; i++, out++) {
+ /* stage 1 */
+ workspace1[0] = out[0] + out[1*8];
+ workspace1[1] = out[0] - out[1*8];
+
+ workspace1[2] = out[2*8] + out[3*8];
+ workspace1[3] = out[2*8] - out[3*8];
+
+ workspace1[4] = out[4*8] + out[5*8];
+ workspace1[5] = out[4*8] - out[5*8];
+
+ workspace1[6] = out[6*8] + out[7*8];
+ workspace1[7] = out[6*8] - out[7*8];
+
+ /* stage 2 */
+ workspace2[0] = workspace1[0] + workspace1[2];
+ workspace2[1] = workspace1[0] - workspace1[2];
+ workspace2[2] = workspace1[1] - workspace1[3];
+ workspace2[3] = workspace1[1] + workspace1[3];
+
+ workspace2[4] = workspace1[4] + workspace1[6];
+ workspace2[5] = workspace1[4] - workspace1[6];
+ workspace2[6] = workspace1[5] - workspace1[7];
+ workspace2[7] = workspace1[5] + workspace1[7];
+
+ /* stage 3 */
+ out[0*8] = workspace2[0] + workspace2[4];
+ out[1*8] = workspace2[0] - workspace2[4];
+ out[2*8] = workspace2[1] - workspace2[5];
+ out[3*8] = workspace2[1] + workspace2[5];
+ out[4*8] = workspace2[2] + workspace2[6];
+ out[5*8] = workspace2[2] - workspace2[6];
+ out[6*8] = workspace2[3] - workspace2[7];
+ out[7*8] = workspace2[3] + workspace2[7];
+ }
+}
+
+static void ifwht(const s16 *block, s16 *output_block, int intra)
+{
+ /*
+ * we'll need more than 8 bits for the transformed coefficients
+ * use native unit of cpu
+ */
+ int workspace1[8], workspace2[8];
+ int inter = intra ? 0 : 1;
+ const s16 *tmp = block;
+ s16 *out = output_block;
+ int i;
+
+ for (i = 0; i < 8; i++, tmp += 8, out += 8) {
+ /* stage 1 */
+ workspace1[0] = tmp[0] + tmp[1];
+ workspace1[1] = tmp[0] - tmp[1];
+
+ workspace1[2] = tmp[2] + tmp[3];
+ workspace1[3] = tmp[2] - tmp[3];
+
+ workspace1[4] = tmp[4] + tmp[5];
+ workspace1[5] = tmp[4] - tmp[5];
+
+ workspace1[6] = tmp[6] + tmp[7];
+ workspace1[7] = tmp[6] - tmp[7];
+
+ /* stage 2 */
+ workspace2[0] = workspace1[0] + workspace1[2];
+ workspace2[1] = workspace1[0] - workspace1[2];
+ workspace2[2] = workspace1[1] - workspace1[3];
+ workspace2[3] = workspace1[1] + workspace1[3];
+
+ workspace2[4] = workspace1[4] + workspace1[6];
+ workspace2[5] = workspace1[4] - workspace1[6];
+ workspace2[6] = workspace1[5] - workspace1[7];
+ workspace2[7] = workspace1[5] + workspace1[7];
+
+ /* stage 3 */
+ out[0] = workspace2[0] + workspace2[4];
+ out[1] = workspace2[0] - workspace2[4];
+ out[2] = workspace2[1] - workspace2[5];
+ out[3] = workspace2[1] + workspace2[5];
+ out[4] = workspace2[2] + workspace2[6];
+ out[5] = workspace2[2] - workspace2[6];
+ out[6] = workspace2[3] - workspace2[7];
+ out[7] = workspace2[3] + workspace2[7];
+ }
+
+ out = output_block;
+
+ for (i = 0; i < 8; i++, out++) {
+ /* stage 1 */
+ workspace1[0] = out[0] + out[1 * 8];
+ workspace1[1] = out[0] - out[1 * 8];
+
+ workspace1[2] = out[2 * 8] + out[3 * 8];
+ workspace1[3] = out[2 * 8] - out[3 * 8];
+
+ workspace1[4] = out[4 * 8] + out[5 * 8];
+ workspace1[5] = out[4 * 8] - out[5 * 8];
+
+ workspace1[6] = out[6 * 8] + out[7 * 8];
+ workspace1[7] = out[6 * 8] - out[7 * 8];
+
+ /* stage 2 */
+ workspace2[0] = workspace1[0] + workspace1[2];
+ workspace2[1] = workspace1[0] - workspace1[2];
+ workspace2[2] = workspace1[1] - workspace1[3];
+ workspace2[3] = workspace1[1] + workspace1[3];
+
+ workspace2[4] = workspace1[4] + workspace1[6];
+ workspace2[5] = workspace1[4] - workspace1[6];
+ workspace2[6] = workspace1[5] - workspace1[7];
+ workspace2[7] = workspace1[5] + workspace1[7];
+
+ /* stage 3 */
+ if (inter) {
+ int d;
+
+ out[0 * 8] = workspace2[0] + workspace2[4];
+ out[1 * 8] = workspace2[0] - workspace2[4];
+ out[2 * 8] = workspace2[1] - workspace2[5];
+ out[3 * 8] = workspace2[1] + workspace2[5];
+ out[4 * 8] = workspace2[2] + workspace2[6];
+ out[5 * 8] = workspace2[2] - workspace2[6];
+ out[6 * 8] = workspace2[3] - workspace2[7];
+ out[7 * 8] = workspace2[3] + workspace2[7];
+
+ for (d = 0; d < 8; d++)
+ out[8 * d] >>= 6;
+ } else {
+ int d;
+
+ out[0 * 8] = workspace2[0] + workspace2[4];
+ out[1 * 8] = workspace2[0] - workspace2[4];
+ out[2 * 8] = workspace2[1] - workspace2[5];
+ out[3 * 8] = workspace2[1] + workspace2[5];
+ out[4 * 8] = workspace2[2] + workspace2[6];
+ out[5 * 8] = workspace2[2] - workspace2[6];
+ out[6 * 8] = workspace2[3] - workspace2[7];
+ out[7 * 8] = workspace2[3] + workspace2[7];
+
+ for (d = 0; d < 8; d++) {
+ out[8 * d] >>= 6;
+ out[8 * d] += 128;
+ }
+ }
+ }
+}
+
+static void fill_encoder_block(const u8 *input, s16 *dst,
+ unsigned int stride, unsigned int input_step)
+{
+ int i, j;
+
+ for (i = 0; i < 8; i++) {
+ for (j = 0; j < 8; j++, input += input_step)
+ *dst++ = *input;
+ input += (stride - 8) * input_step;
+ }
+}
+
+static int var_intra(const s16 *input)
+{
+ int32_t mean = 0;
+ int32_t ret = 0;
+ const s16 *tmp = input;
+ int i;
+
+ for (i = 0; i < 8 * 8; i++, tmp++)
+ mean += *tmp;
+ mean /= 64;
+ tmp = input;
+ for (i = 0; i < 8 * 8; i++, tmp++)
+ ret += (*tmp - mean) < 0 ? -(*tmp - mean) : (*tmp - mean);
+ return ret;
+}
+
+static int var_inter(const s16 *old, const s16 *new)
+{
+ int32_t ret = 0;
+ int i;
+
+ for (i = 0; i < 8 * 8; i++, old++, new++)
+ ret += (*old - *new) < 0 ? -(*old - *new) : (*old - *new);
+ return ret;
+}
+
+static int decide_blocktype(const u8 *cur, const u8 *reference,
+ s16 *deltablock, unsigned int stride,
+ unsigned int input_step)
+{
+ s16 tmp[64];
+ s16 old[64];
+ s16 *work = tmp;
+ unsigned int k, l;
+ int vari;
+ int vard;
+
+ fill_encoder_block(cur, tmp, stride, input_step);
+ fill_encoder_block(reference, old, 8, 1);
+ vari = var_intra(tmp);
+
+ for (k = 0; k < 8; k++) {
+ for (l = 0; l < 8; l++) {
+ *deltablock = *work - *reference;
+ deltablock++;
+ work++;
+ reference++;
+ }
+ }
+ deltablock -= 64;
+ vard = var_inter(old, tmp);
+ return vari <= vard ? IBLOCK : PBLOCK;
+}
+
+static void fill_decoder_block(u8 *dst, const s16 *input, int stride)
+{
+ int i, j;
+
+ for (i = 0; i < 8; i++) {
+ for (j = 0; j < 8; j++, input++, dst++) {
+ if (*input < 0)
+ *dst = 0;
+ else if (*input > 255)
+ *dst = 255;
+ else
+ *dst = *input;
+ }
+ dst += stride - 8;
+ }
+}
+
+static void add_deltas(s16 *deltas, const u8 *ref, int stride)
+{
+ int k, l;
+
+ for (k = 0; k < 8; k++) {
+ for (l = 0; l < 8; l++) {
+ *deltas += *ref++;
+ /*
+ * Due to quantizing, it might possible that the
+ * decoded coefficients are slightly out of range
+ */
+ if (*deltas < 0)
+ *deltas = 0;
+ else if (*deltas > 255)
+ *deltas = 255;
+ deltas++;
+ }
+ ref += stride - 8;
+ }
+}
+
+static u32 encode_plane(u8 *input, u8 *refp, __be16 **rlco, __be16 *rlco_max,
+ struct fwht_cframe *cf, u32 height, u32 width,
+ unsigned int input_step,
+ bool is_intra, bool next_is_intra)
+{
+ u8 *input_start = input;
+ __be16 *rlco_start = *rlco;
+ s16 deltablock[64];
+ __be16 pframe_bit = htons(PFRAME_BIT);
+ u32 encoding = 0;
+ unsigned int last_size = 0;
+ unsigned int i, j;
+
+ for (j = 0; j < height / 8; j++) {
+ for (i = 0; i < width / 8; i++) {
+ /* intra code, first frame is always intra coded. */
+ int blocktype = IBLOCK;
+ unsigned int size;
+
+ if (!is_intra)
+ blocktype = decide_blocktype(input, refp,
+ deltablock, width, input_step);
+ if (blocktype == IBLOCK) {
+ fwht(input, cf->coeffs, width, input_step, 1);
+ quantize_intra(cf->coeffs, cf->de_coeffs,
+ cf->i_frame_qp);
+ } else {
+ /* inter code */
+ encoding |= FWHT_FRAME_PCODED;
+ fwht16(deltablock, cf->coeffs, 8, 0);
+ quantize_inter(cf->coeffs, cf->de_coeffs,
+ cf->p_frame_qp);
+ }
+ if (!next_is_intra) {
+ ifwht(cf->de_coeffs, cf->de_fwht, blocktype);
+
+ if (blocktype == PBLOCK)
+ add_deltas(cf->de_fwht, refp, 8);
+ fill_decoder_block(refp, cf->de_fwht, 8);
+ }
+
+ input += 8 * input_step;
+ refp += 8 * 8;
+
+ size = rlc(cf->coeffs, *rlco, blocktype);
+ if (last_size == size &&
+ !memcmp(*rlco + 1, *rlco - size + 1, 2 * size - 2)) {
+ __be16 *last_rlco = *rlco - size;
+ s16 hdr = ntohs(*last_rlco);
+
+ if (!((*last_rlco ^ **rlco) & pframe_bit) &&
+ (hdr & DUPS_MASK) < DUPS_MASK)
+ *last_rlco = htons(hdr + 2);
+ else
+ *rlco += size;
+ } else {
+ *rlco += size;
+ }
+ if (*rlco >= rlco_max) {
+ encoding |= FWHT_FRAME_UNENCODED;
+ goto exit_loop;
+ }
+ last_size = size;
+ }
+ input += width * 7 * input_step;
+ }
+
+exit_loop:
+ if (encoding & FWHT_FRAME_UNENCODED) {
+ u8 *out = (u8 *)rlco_start;
+
+ input = input_start;
+ /*
+ * The compressed stream should never contain the magic
+ * header, so when we copy the YUV data we replace 0xff
+ * by 0xfe. Since YUV is limited range such values
+ * shouldn't appear anyway.
+ */
+ for (i = 0; i < height * width; i++, input += input_step)
+ *out++ = (*input == 0xff) ? 0xfe : *input;
+ *rlco = (__be16 *)out;
+ encoding &= ~FWHT_FRAME_PCODED;
+ }
+ return encoding;
+}
+
+u32 fwht_encode_frame(struct fwht_raw_frame *frm,
+ struct fwht_raw_frame *ref_frm,
+ struct fwht_cframe *cf,
+ bool is_intra, bool next_is_intra)
+{
+ unsigned int size = frm->height * frm->width;
+ __be16 *rlco = cf->rlc_data;
+ __be16 *rlco_max;
+ u32 encoding;
+ u32 chroma_h = frm->height / frm->height_div;
+ u32 chroma_w = frm->width / frm->width_div;
+ unsigned int chroma_size = chroma_h * chroma_w;
+
+ rlco_max = rlco + size / 2 - 256;
+ encoding = encode_plane(frm->luma, ref_frm->luma, &rlco, rlco_max, cf,
+ frm->height, frm->width,
+ frm->luma_step, is_intra, next_is_intra);
+ if (encoding & FWHT_FRAME_UNENCODED)
+ encoding |= FWHT_LUMA_UNENCODED;
+ encoding &= ~FWHT_FRAME_UNENCODED;
+ rlco_max = rlco + chroma_size / 2 - 256;
+ encoding |= encode_plane(frm->cb, ref_frm->cb, &rlco, rlco_max, cf,
+ chroma_h, chroma_w,
+ frm->chroma_step, is_intra, next_is_intra);
+ if (encoding & FWHT_FRAME_UNENCODED)
+ encoding |= FWHT_CB_UNENCODED;
+ encoding &= ~FWHT_FRAME_UNENCODED;
+ rlco_max = rlco + chroma_size / 2 - 256;
+ encoding |= encode_plane(frm->cr, ref_frm->cr, &rlco, rlco_max, cf,
+ chroma_h, chroma_w,
+ frm->chroma_step, is_intra, next_is_intra);
+ if (encoding & FWHT_FRAME_UNENCODED)
+ encoding |= FWHT_CR_UNENCODED;
+ encoding &= ~FWHT_FRAME_UNENCODED;
+ cf->size = (rlco - cf->rlc_data) * sizeof(*rlco);
+ return encoding;
+}
+
+static void decode_plane(struct fwht_cframe *cf, const __be16 **rlco, u8 *ref,
+ u32 height, u32 width, bool uncompressed)
+{
+ unsigned int copies = 0;
+ s16 copy[8 * 8];
+ s16 stat;
+ unsigned int i, j;
+
+ if (uncompressed) {
+ memcpy(ref, *rlco, width * height);
+ *rlco += width * height / 2;
+ return;
+ }
+
+ /*
+ * When decoding each macroblock the rlco pointer will be increased
+ * by 65 * 2 bytes worst-case.
+ * To avoid overflow the buffer has to be 65/64th of the actual raw
+ * image size, just in case someone feeds it malicious data.
+ */
+ for (j = 0; j < height / 8; j++) {
+ for (i = 0; i < width / 8; i++) {
+ u8 *refp = ref + j * 8 * width + i * 8;
+
+ if (copies) {
+ memcpy(cf->de_fwht, copy, sizeof(copy));
+ if (stat & PFRAME_BIT)
+ add_deltas(cf->de_fwht, refp, width);
+ fill_decoder_block(refp, cf->de_fwht, width);
+ copies--;
+ continue;
+ }
+
+ stat = derlc(rlco, cf->coeffs);
+
+ if (stat & PFRAME_BIT)
+ dequantize_inter(cf->coeffs);
+ else
+ dequantize_intra(cf->coeffs);
+
+ ifwht(cf->coeffs, cf->de_fwht,
+ (stat & PFRAME_BIT) ? 0 : 1);
+
+ copies = (stat & DUPS_MASK) >> 1;
+ if (copies)
+ memcpy(copy, cf->de_fwht, sizeof(copy));
+ if (stat & PFRAME_BIT)
+ add_deltas(cf->de_fwht, refp, width);
+ fill_decoder_block(refp, cf->de_fwht, width);
+ }
+ }
+}
+
+void fwht_decode_frame(struct fwht_cframe *cf, struct fwht_raw_frame *ref,
+ u32 hdr_flags)
+{
+ const __be16 *rlco = cf->rlc_data;
+ u32 h = cf->height / 2;
+ u32 w = cf->width / 2;
+
+ if (hdr_flags & FWHT_FL_CHROMA_FULL_HEIGHT)
+ h *= 2;
+ if (hdr_flags & FWHT_FL_CHROMA_FULL_WIDTH)
+ w *= 2;
+ decode_plane(cf, &rlco, ref->luma, cf->height, cf->width,
+ hdr_flags & FWHT_FL_LUMA_IS_UNCOMPRESSED);
+ decode_plane(cf, &rlco, ref->cb, h, w,
+ hdr_flags & FWHT_FL_CB_IS_UNCOMPRESSED);
+ decode_plane(cf, &rlco, ref->cr, h, w,
+ hdr_flags & FWHT_FL_CR_IS_UNCOMPRESSED);
+}
diff --git a/utils/common/codec-fwht.h b/utils/common/codec-fwht.h
new file mode 100644
index 00000000..03d678ec
--- /dev/null
+++ b/utils/common/codec-fwht.h
@@ -0,0 +1,137 @@
+/* SPDX-License-Identifier: GPL-2.0+ */
+/*
+ * Copyright 2016 Tom aan de Wiel
+ * Copyright 2018 Cisco Systems, Inc. and/or its affiliates. All rights reserved.
+ */
+
+#ifndef CODEC_FWHT_H
+#define CODEC_FWHT_H
+
+#include <linux/types.h>
+#include <string.h>
+#include <errno.h>
+#include <arpa/inet.h>
+#include <stdbool.h>
+#include <stdint.h>
+
+#define BIT(x) (1 << (x))
+#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
+
+typedef __u32 u32;
+typedef __u16 u16;
+typedef __s16 s16;
+typedef __s32 s32;
+typedef __u8 u8;
+
+/*
+ * The compressed format consists of a fwht_cframe_hdr struct followed by the
+ * compressed frame data. The header contains the size of that data.
+ * Each Y, Cb and Cr plane is compressed separately. If the compressed
+ * size of each plane becomes larger than the uncompressed size, then
+ * that plane is stored uncompressed and the corresponding bit is set
+ * in the flags field of the header.
+ *
+ * Each compressed plane consists of macroblocks and each macroblock
+ * is run-length-encoded. Each macroblock starts with a 16 bit value.
+ * Bit 15 indicates if this is a P-coded macroblock (1) or not (0).
+ * P-coded macroblocks contain a delta against the previous frame.
+ *
+ * Bits 1-12 contain a number. If non-zero, then this same macroblock
+ * repeats that number of times. This results in a high degree of
+ * compression for generated images like colorbars.
+ *
+ * Following this macroblock header the MB coefficients are run-length
+ * encoded: the top 12 bits contain the coefficient, the bottom 4 bits
+ * tell how many times this coefficient occurs. The value 0xf indicates
+ * that the remainder of the macroblock should be filled with zeroes.
+ *
+ * All 16 and 32 bit values are stored in big-endian (network) order.
+ *
+ * Each fwht_cframe_hdr starts with an 8 byte magic header that is
+ * guaranteed not to occur in the compressed frame data. This header
+ * can be used to sync to the next frame.
+ *
+ * This codec uses the Fast Walsh Hadamard Transform. Tom aan de Wiel
+ * developed this as part of a university project, specifically for use
+ * with this driver. His project report can be found here:
+ *
+ * https://hverkuil.home.xs4all.nl/fwht.pdf
+ */
+
+/*
+ * This is a sequence of 8 bytes with the low 4 bits set to 0xf.
+ *
+ * This sequence cannot occur in the encoded data
+ *
+ * Note that these two magic values are symmetrical so endian issues here.
+ */
+#define FWHT_MAGIC1 0x4f4f4f4f
+#define FWHT_MAGIC2 0xffffffff
+
+#define FWHT_VERSION 1
+
+/* Set if this is an interlaced format */
+#define FWHT_FL_IS_INTERLACED BIT(0)
+/* Set if this is a bottom-first (NTSC) interlaced format */
+#define FWHT_FL_IS_BOTTOM_FIRST BIT(1)
+/* Set if each 'frame' contains just one field */
+#define FWHT_FL_IS_ALTERNATE BIT(2)
+/*
+ * If FWHT_FL_IS_ALTERNATE was set, then this is set if this
+ * 'frame' is the bottom field, else it is the top field.
+ */
+#define FWHT_FL_IS_BOTTOM_FIELD BIT(3)
+/* Set if this frame is uncompressed */
+#define FWHT_FL_LUMA_IS_UNCOMPRESSED BIT(4)
+#define FWHT_FL_CB_IS_UNCOMPRESSED BIT(5)
+#define FWHT_FL_CR_IS_UNCOMPRESSED BIT(6)
+#define FWHT_FL_CHROMA_FULL_HEIGHT BIT(7)
+#define FWHT_FL_CHROMA_FULL_WIDTH BIT(8)
+
+struct fwht_cframe_hdr {
+ u32 magic1;
+ u32 magic2;
+ __be32 version;
+ __be32 width, height;
+ __be32 flags;
+ __be32 colorspace;
+ __be32 xfer_func;
+ __be32 ycbcr_enc;
+ __be32 quantization;
+ __be32 size;
+};
+
+struct fwht_cframe {
+ unsigned int width, height;
+ u16 i_frame_qp;
+ u16 p_frame_qp;
+ __be16 *rlc_data;
+ s16 coeffs[8 * 8];
+ s16 de_coeffs[8 * 8];
+ s16 de_fwht[8 * 8];
+ u32 size;
+};
+
+struct fwht_raw_frame {
+ unsigned int width, height;
+ unsigned int width_div;
+ unsigned int height_div;
+ unsigned int luma_step;
+ unsigned int chroma_step;
+ u8 *luma, *cb, *cr;
+};
+
+#define FWHT_FRAME_PCODED BIT(0)
+#define FWHT_FRAME_UNENCODED BIT(1)
+#define FWHT_LUMA_UNENCODED BIT(2)
+#define FWHT_CB_UNENCODED BIT(3)
+#define FWHT_CR_UNENCODED BIT(4)
+
+u32 fwht_encode_frame(struct fwht_raw_frame *frm,
+ struct fwht_raw_frame *ref_frm,
+ struct fwht_cframe *cf,
+ bool is_intra, bool next_is_intra);
+void fwht_decode_frame(struct fwht_cframe *cf, struct fwht_raw_frame *ref,
+ u32 hdr_flags);
+
+#endif
diff --git a/utils/common/codec-fwht.patch b/utils/common/codec-fwht.patch
new file mode 100644
index 00000000..95606bfd
--- /dev/null
+++ b/utils/common/codec-fwht.patch
@@ -0,0 +1,27 @@
+diff --git a/utils/common/codec-fwht.h b/utils/common/codec-fwht.h
+index cdfad133..51fd147c 100644
+--- a/utils/common/codec-fwht.h
++++ b/utils/common/codec-fwht.h
+@@ -8,8 +8,20 @@
+ #define CODEC_FWHT_H
+
+ #include <linux/types.h>
+-#include <linux/bitops.h>
+-#include <asm/byteorder.h>
++#include <string.h>
++#include <errno.h>
++#include <arpa/inet.h>
++#include <stdbool.h>
++#include <stdint.h>
++
++#define BIT(x) (1 << (x))
++#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
++
++typedef __u32 u32;
++typedef __u16 u16;
++typedef __s16 s16;
++typedef __s32 s32;
++typedef __u8 u8;
+
+ /*
+ * The compressed format consists of a fwht_cframe_hdr struct followed by the
diff --git a/utils/common/codec-v4l2-fwht.c b/utils/common/codec-v4l2-fwht.c
new file mode 100644
index 00000000..cfcf84b8
--- /dev/null
+++ b/utils/common/codec-v4l2-fwht.c
@@ -0,0 +1,325 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * A V4L2 frontend for the FWHT codec
+ *
+ * Copyright 2018 Cisco Systems, Inc. and/or its affiliates. All rights reserved.
+ */
+
+#include <linux/errno.h>
+#include <linux/string.h>
+#include <linux/videodev2.h>
+#include "codec-v4l2-fwht.h"
+
+static const struct v4l2_fwht_pixfmt_info v4l2_fwht_pixfmts[] = {
+ { V4L2_PIX_FMT_YUV420, 1, 3, 2, 1, 1, 2, 2 },
+ { V4L2_PIX_FMT_YVU420, 1, 3, 2, 1, 1, 2, 2 },
+ { V4L2_PIX_FMT_YUV422P, 1, 2, 1, 1, 1, 2, 1 },
+ { V4L2_PIX_FMT_NV12, 1, 3, 2, 1, 2, 2, 2 },
+ { V4L2_PIX_FMT_NV21, 1, 3, 2, 1, 2, 2, 2 },
+ { V4L2_PIX_FMT_NV16, 1, 2, 1, 1, 2, 2, 1 },
+ { V4L2_PIX_FMT_NV61, 1, 2, 1, 1, 2, 2, 1 },
+ { V4L2_PIX_FMT_NV24, 1, 3, 1, 1, 2, 1, 1 },
+ { V4L2_PIX_FMT_NV42, 1, 3, 1, 1, 2, 1, 1 },
+ { V4L2_PIX_FMT_YUYV, 2, 2, 1, 2, 4, 2, 1 },
+ { V4L2_PIX_FMT_YVYU, 2, 2, 1, 2, 4, 2, 1 },
+ { V4L2_PIX_FMT_UYVY, 2, 2, 1, 2, 4, 2, 1 },
+ { V4L2_PIX_FMT_VYUY, 2, 2, 1, 2, 4, 2, 1 },
+ { V4L2_PIX_FMT_BGR24, 3, 3, 1, 3, 3, 1, 1 },
+ { V4L2_PIX_FMT_RGB24, 3, 3, 1, 3, 3, 1, 1 },
+ { V4L2_PIX_FMT_HSV24, 3, 3, 1, 3, 3, 1, 1 },
+ { V4L2_PIX_FMT_BGR32, 4, 4, 1, 4, 4, 1, 1 },
+ { V4L2_PIX_FMT_XBGR32, 4, 4, 1, 4, 4, 1, 1 },
+ { V4L2_PIX_FMT_RGB32, 4, 4, 1, 4, 4, 1, 1 },
+ { V4L2_PIX_FMT_XRGB32, 4, 4, 1, 4, 4, 1, 1 },
+ { V4L2_PIX_FMT_HSV32, 4, 4, 1, 4, 4, 1, 1 },
+};
+
+const struct v4l2_fwht_pixfmt_info *v4l2_fwht_find_pixfmt(u32 pixelformat)
+{
+ unsigned int i;
+
+ for (i = 0; i < ARRAY_SIZE(v4l2_fwht_pixfmts); i++)
+ if (v4l2_fwht_pixfmts[i].id == pixelformat)
+ return v4l2_fwht_pixfmts + i;
+ return NULL;
+}
+
+const struct v4l2_fwht_pixfmt_info *v4l2_fwht_get_pixfmt(u32 idx)
+{
+ if (idx >= ARRAY_SIZE(v4l2_fwht_pixfmts))
+ return NULL;
+ return v4l2_fwht_pixfmts + idx;
+}
+
+unsigned int v4l2_fwht_encode(struct v4l2_fwht_state *state,
+ u8 *p_in, u8 *p_out)
+{
+ unsigned int size = state->width * state->height;
+ const struct v4l2_fwht_pixfmt_info *info = state->info;
+ struct fwht_cframe_hdr *p_hdr;
+ struct fwht_cframe cf;
+ struct fwht_raw_frame rf;
+ u32 encoding;
+ u32 flags = 0;
+
+ rf.width = state->width;
+ rf.height = state->height;
+ rf.luma = p_in;
+ rf.width_div = info->width_div;
+ rf.height_div = info->height_div;
+ rf.luma_step = info->luma_step;
+ rf.chroma_step = info->chroma_step;
+
+ switch (info->id) {
+ case V4L2_PIX_FMT_YUV420:
+ rf.cb = rf.luma + size;
+ rf.cr = rf.cb + size / 4;
+ break;
+ case V4L2_PIX_FMT_YVU420:
+ rf.cr = rf.luma + size;
+ rf.cb = rf.cr + size / 4;
+ break;
+ case V4L2_PIX_FMT_YUV422P:
+ rf.cb = rf.luma + size;
+ rf.cr = rf.cb + size / 2;
+ break;
+ case V4L2_PIX_FMT_NV12:
+ case V4L2_PIX_FMT_NV16:
+ case V4L2_PIX_FMT_NV24:
+ rf.cb = rf.luma + size;
+ rf.cr = rf.cb + 1;
+ break;
+ case V4L2_PIX_FMT_NV21:
+ case V4L2_PIX_FMT_NV61:
+ case V4L2_PIX_FMT_NV42:
+ rf.cr = rf.luma + size;
+ rf.cb = rf.cr + 1;
+ break;
+ case V4L2_PIX_FMT_YUYV:
+ rf.cb = rf.luma + 1;
+ rf.cr = rf.cb + 2;
+ break;
+ case V4L2_PIX_FMT_YVYU:
+ rf.cr = rf.luma + 1;
+ rf.cb = rf.cr + 2;
+ break;
+ case V4L2_PIX_FMT_UYVY:
+ rf.cb = rf.luma;
+ rf.cr = rf.cb + 2;
+ rf.luma++;
+ break;
+ case V4L2_PIX_FMT_VYUY:
+ rf.cr = rf.luma;
+ rf.cb = rf.cr + 2;
+ rf.luma++;
+ break;
+ case V4L2_PIX_FMT_RGB24:
+ case V4L2_PIX_FMT_HSV24:
+ rf.cr = rf.luma;
+ rf.cb = rf.cr + 2;
+ rf.luma++;
+ break;
+ case V4L2_PIX_FMT_BGR24:
+ rf.cb = rf.luma;
+ rf.cr = rf.cb + 2;
+ rf.luma++;
+ break;
+ case V4L2_PIX_FMT_RGB32:
+ case V4L2_PIX_FMT_XRGB32:
+ case V4L2_PIX_FMT_HSV32:
+ rf.cr = rf.luma + 1;
+ rf.cb = rf.cr + 2;
+ rf.luma += 2;
+ break;
+ case V4L2_PIX_FMT_BGR32:
+ case V4L2_PIX_FMT_XBGR32:
+ rf.cb = rf.luma;
+ rf.cr = rf.cb + 2;
+ rf.luma++;
+ break;
+ }
+
+ cf.width = state->width;
+ cf.height = state->height;
+ cf.i_frame_qp = state->i_frame_qp;
+ cf.p_frame_qp = state->p_frame_qp;
+ cf.rlc_data = (__be16 *)(p_out + sizeof(*p_hdr));
+
+ encoding = fwht_encode_frame(&rf, &state->ref_frame, &cf,
+ !state->gop_cnt,
+ state->gop_cnt == state->gop_size - 1);
+ if (!(encoding & FWHT_FRAME_PCODED))
+ state->gop_cnt = 0;
+ if (++state->gop_cnt >= state->gop_size)
+ state->gop_cnt = 0;
+
+ p_hdr = (struct fwht_cframe_hdr *)p_out;
+ p_hdr->magic1 = FWHT_MAGIC1;
+ p_hdr->magic2 = FWHT_MAGIC2;
+ p_hdr->version = htonl(FWHT_VERSION);
+ p_hdr->width = htonl(cf.width);
+ p_hdr->height = htonl(cf.height);
+ if (encoding & FWHT_LUMA_UNENCODED)
+ flags |= FWHT_FL_LUMA_IS_UNCOMPRESSED;
+ if (encoding & FWHT_CB_UNENCODED)
+ flags |= FWHT_FL_CB_IS_UNCOMPRESSED;
+ if (encoding & FWHT_CR_UNENCODED)
+ flags |= FWHT_FL_CR_IS_UNCOMPRESSED;
+ if (rf.height_div == 1)
+ flags |= FWHT_FL_CHROMA_FULL_HEIGHT;
+ if (rf.width_div == 1)
+ flags |= FWHT_FL_CHROMA_FULL_WIDTH;
+ p_hdr->flags = htonl(flags);
+ p_hdr->colorspace = htonl(state->colorspace);
+ p_hdr->xfer_func = htonl(state->xfer_func);
+ p_hdr->ycbcr_enc = htonl(state->ycbcr_enc);
+ p_hdr->quantization = htonl(state->quantization);
+ p_hdr->size = htonl(cf.size);
+ state->ref_frame.width = cf.width;
+ state->ref_frame.height = cf.height;
+ return cf.size + sizeof(*p_hdr);
+}
+
+int v4l2_fwht_decode(struct v4l2_fwht_state *state,
+ u8 *p_in, u8 *p_out)
+{
+ unsigned int size = state->width * state->height;
+ unsigned int chroma_size = size;
+ unsigned int i;
+ u32 flags;
+ struct fwht_cframe_hdr *p_hdr;
+ struct fwht_cframe cf;
+ u8 *p;
+
+ p_hdr = (struct fwht_cframe_hdr *)p_in;
+ cf.width = ntohl(p_hdr->width);
+ cf.height = ntohl(p_hdr->height);
+ flags = ntohl(p_hdr->flags);
+ state->colorspace = ntohl(p_hdr->colorspace);
+ state->xfer_func = ntohl(p_hdr->xfer_func);
+ state->ycbcr_enc = ntohl(p_hdr->ycbcr_enc);
+ state->quantization = ntohl(p_hdr->quantization);
+ cf.rlc_data = (__be16 *)(p_in + sizeof(*p_hdr));
+
+ if (p_hdr->magic1 != FWHT_MAGIC1 ||
+ p_hdr->magic2 != FWHT_MAGIC2 ||
+ ntohl(p_hdr->version) != FWHT_VERSION ||
+ (cf.width & 7) || (cf.height & 7))
+ return -EINVAL;
+
+ /* TODO: support resolution changes */
+ if (cf.width != state->width || cf.height != state->height)
+ return -EINVAL;
+
+ if (!(flags & FWHT_FL_CHROMA_FULL_WIDTH))
+ chroma_size /= 2;
+ if (!(flags & FWHT_FL_CHROMA_FULL_HEIGHT))
+ chroma_size /= 2;
+
+ fwht_decode_frame(&cf, &state->ref_frame, flags);
+
+ switch (state->info->id) {
+ case V4L2_PIX_FMT_YUV420:
+ case V4L2_PIX_FMT_YUV422P:
+ memcpy(p_out, state->ref_frame.luma, size);
+ p_out += size;
+ memcpy(p_out, state->ref_frame.cb, chroma_size);
+ p_out += chroma_size;
+ memcpy(p_out, state->ref_frame.cr, chroma_size);
+ break;
+ case V4L2_PIX_FMT_YVU420:
+ memcpy(p_out, state->ref_frame.luma, size);
+ p_out += size;
+ memcpy(p_out, state->ref_frame.cr, chroma_size);
+ p_out += chroma_size;
+ memcpy(p_out, state->ref_frame.cb, chroma_size);
+ break;
+ case V4L2_PIX_FMT_NV12:
+ case V4L2_PIX_FMT_NV16:
+ case V4L2_PIX_FMT_NV24:
+ memcpy(p_out, state->ref_frame.luma, size);
+ p_out += size;
+ for (i = 0, p = p_out; i < chroma_size; i++) {
+ *p++ = state->ref_frame.cb[i];
+ *p++ = state->ref_frame.cr[i];
+ }
+ break;
+ case V4L2_PIX_FMT_NV21:
+ case V4L2_PIX_FMT_NV61:
+ case V4L2_PIX_FMT_NV42:
+ memcpy(p_out, state->ref_frame.luma, size);
+ p_out += size;
+ for (i = 0, p = p_out; i < chroma_size; i++) {
+ *p++ = state->ref_frame.cr[i];
+ *p++ = state->ref_frame.cb[i];
+ }
+ break;
+ case V4L2_PIX_FMT_YUYV:
+ for (i = 0, p = p_out; i < size; i += 2) {
+ *p++ = state->ref_frame.luma[i];
+ *p++ = state->ref_frame.cb[i / 2];
+ *p++ = state->ref_frame.luma[i + 1];
+ *p++ = state->ref_frame.cr[i / 2];
+ }
+ break;
+ case V4L2_PIX_FMT_YVYU:
+ for (i = 0, p = p_out; i < size; i += 2) {
+ *p++ = state->ref_frame.luma[i];
+ *p++ = state->ref_frame.cr[i / 2];
+ *p++ = state->ref_frame.luma[i + 1];
+ *p++ = state->ref_frame.cb[i / 2];
+ }
+ break;
+ case V4L2_PIX_FMT_UYVY:
+ for (i = 0, p = p_out; i < size; i += 2) {
+ *p++ = state->ref_frame.cb[i / 2];
+ *p++ = state->ref_frame.luma[i];
+ *p++ = state->ref_frame.cr[i / 2];
+ *p++ = state->ref_frame.luma[i + 1];
+ }
+ break;
+ case V4L2_PIX_FMT_VYUY:
+ for (i = 0, p = p_out; i < size; i += 2) {
+ *p++ = state->ref_frame.cr[i / 2];
+ *p++ = state->ref_frame.luma[i];
+ *p++ = state->ref_frame.cb[i / 2];
+ *p++ = state->ref_frame.luma[i + 1];
+ }
+ break;
+ case V4L2_PIX_FMT_RGB24:
+ case V4L2_PIX_FMT_HSV24:
+ for (i = 0, p = p_out; i < size; i++) {
+ *p++ = state->ref_frame.cr[i];
+ *p++ = state->ref_frame.luma[i];
+ *p++ = state->ref_frame.cb[i];
+ }
+ break;
+ case V4L2_PIX_FMT_BGR24:
+ for (i = 0, p = p_out; i < size; i++) {
+ *p++ = state->ref_frame.cb[i];
+ *p++ = state->ref_frame.luma[i];
+ *p++ = state->ref_frame.cr[i];
+ }
+ break;
+ case V4L2_PIX_FMT_RGB32:
+ case V4L2_PIX_FMT_XRGB32:
+ case V4L2_PIX_FMT_HSV32:
+ for (i = 0, p = p_out; i < size; i++) {
+ *p++ = 0;
+ *p++ = state->ref_frame.cr[i];
+ *p++ = state->ref_frame.luma[i];
+ *p++ = state->ref_frame.cb[i];
+ }
+ break;
+ case V4L2_PIX_FMT_BGR32:
+ case V4L2_PIX_FMT_XBGR32:
+ for (i = 0, p = p_out; i < size; i++) {
+ *p++ = state->ref_frame.cb[i];
+ *p++ = state->ref_frame.luma[i];
+ *p++ = state->ref_frame.cr[i];
+ *p++ = 0;
+ }
+ break;
+ }
+ return 0;
+}
diff --git a/utils/common/codec-v4l2-fwht.h b/utils/common/codec-v4l2-fwht.h
new file mode 100644
index 00000000..7794c186
--- /dev/null
+++ b/utils/common/codec-v4l2-fwht.h
@@ -0,0 +1,50 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright 2018 Cisco Systems, Inc. and/or its affiliates. All rights reserved.
+ */
+
+#ifndef CODEC_V4L2_FWHT_H
+#define CODEC_V4L2_FWHT_H
+
+#include "codec-fwht.h"
+
+struct v4l2_fwht_pixfmt_info {
+ u32 id;
+ unsigned int bytesperline_mult;
+ unsigned int sizeimage_mult;
+ unsigned int sizeimage_div;
+ unsigned int luma_step;
+ unsigned int chroma_step;
+ /* Chroma plane subsampling */
+ unsigned int width_div;
+ unsigned int height_div;
+};
+
+struct v4l2_fwht_state {
+ const struct v4l2_fwht_pixfmt_info *info;
+ unsigned int width;
+ unsigned int height;
+ unsigned int gop_size;
+ unsigned int gop_cnt;
+ u16 i_frame_qp;
+ u16 p_frame_qp;
+
+ enum v4l2_colorspace colorspace;
+ enum v4l2_ycbcr_encoding ycbcr_enc;
+ enum v4l2_xfer_func xfer_func;
+ enum v4l2_quantization quantization;
+
+ struct fwht_raw_frame ref_frame;
+ u8 *compressed_frame;
+};
+
+const struct v4l2_fwht_pixfmt_info *v4l2_fwht_find_pixfmt(u32 pixelformat);
+const struct v4l2_fwht_pixfmt_info *v4l2_fwht_get_pixfmt(u32 idx);
+
+unsigned int v4l2_fwht_encode(struct v4l2_fwht_state *state,
+ u8 *p_in, u8 *p_out);
+
+int v4l2_fwht_decode(struct v4l2_fwht_state *state,
+ u8 *p_in, u8 *p_out);
+
+#endif

Privacy Policy