/* SPDX-License-Identifier: Apache 2.0 */
/* Copyright 2023 - Present, Light Transport Entertainment Inc. */

/* TODO:
 *
 * - [ ] Stream decoding API
 * - [ ] Stream encoding API
 *
 */

#ifndef NANOZLIB_H_
#define NANOZLIB_H_

#include <stdint.h>
#include <stdio.h>

#ifdef __cplusplus
extern "C" {
#endif

typedef enum nanoz_status {
  NANOZ_SUCCESS = 0,
  NANOZ_ERROR = -1,  // general error code.
  NANOZ_ERROR_INVALID_ARGUMENT = -2,
  NANOZ_ERROR_CORRUPTED = -3,
  NANOZ_ERROR_INTERNAL = -4,
} nanoz_status_t;

#if 0  // TODO
/* Up to 2GB chunk. */
typedef (*nanoz_stream_read)(const uint8_t *addr, uint8_t *dst_addr, const uint32_t read_bytes, const void *user_ptr);
typedef (*nanoz_stream_write)(const uint8_t *addr, const uint32_t write_bytes, const void *user_ptr);
#endif

/* TODO: Get uncompressed size function */

/*
 * zlib decompression. Up to 2GB compressed data.
 *
 * @param[in] src_addr Source buffer address containing compressed data.
 * @param[in] src_size Source buffer bytes.
 * @param[in] dst_size Destination buffer size. Must be larger than or equal to uncompressed size.
 * @param[out] dst_addr Destination buffer address.
 * @param[out] uncompressed_size Uncompressed bytes.
 * contain `uncompressed_size` bytes.
 * @return NANOZ_SUCCESS upon success.
 *
 * TODO: return error message string.
 */
nanoz_status_t nanoz_uncompress(const unsigned char *src_addr,
                                int32_t src_size,
                                const uint64_t dst_size,
                                unsigned char *dst_addr,
                                uint64_t *uncompressed_size);

/*
 * Compute compress bound.
 */
uint64_t nanoz_compressBound(uint64_t sourceLen);

/*
 * zlib compression. Currently we use stb's zlib_compress
 *
 * @param[in] data Input data
 * @param[in] data_len Input data bytes(up to 2GB)
 * @param[out] out_len Input data
 * @param[in] quality Compression quality(5 or more. Usually 8)
 *
 * @return Compressed bytes upon success. NULL when failed to compress or any input parameter is wrong.
 */
unsigned char *nanoz_compress(unsigned char *data, int data_len, int *out_len,
                              int quality);

#if 0  // TODO
nanoz_status_t nanoz_stream_uncompress(nanoz_stream_read *reader, nanoz_stream_writer *writer);
#endif

#ifdef __cplusplus
}
#endif

#if defined(NANOZLIB_IMPLEMENTATION)

#define WUFFS_IMPLEMENTATION

#define WUFFS_CONFIG__STATIC_FUNCTIONS

#define WUFFS_CONFIG__MODULES
#define WUFFS_CONFIG__MODULE__BASE
#define WUFFS_CONFIG__MODULE__CRC32
#define WUFFS_CONFIG__MODULE__ADLER32
#define WUFFS_CONFIG__MODULE__DEFLATE
#define WUFFS_CONFIG__MODULE__ZLIB

#include "wuffs-v0.3.c"

#define WORK_BUFFER_ARRAY_SIZE \
  WUFFS_ZLIB__DECODER_WORKBUF_LEN_MAX_INCL_WORST_CASE

nanoz_status_t nanoz_uncompress(const unsigned char *src_addr,
                                const int32_t src_size,
                                const uint64_t dst_size,
                                unsigned char *dst_addr,
                                uint64_t *uncompressed_size_out) {
// WUFFS_ZLIB__DECODER_WORKBUF_LEN_MAX_INCL_WORST_CASE = 1, its tiny bytes and
// safe to alloc worbuf at heap location.
#if WORK_BUFFER_ARRAY_SIZE > 0
  uint8_t work_buffer_array[WORK_BUFFER_ARRAY_SIZE];
#else
  // Not all C/C++ compilers support 0-length arrays.
  uint8_t work_buffer_array[1];
#endif

  if (!src_addr) {
    return NANOZ_ERROR_INVALID_ARGUMENT;
  }

  if (src_size < 4) {
    return NANOZ_ERROR_INVALID_ARGUMENT;
  }

  if (!dst_addr) {
    return NANOZ_ERROR_INVALID_ARGUMENT;
  }

  if (dst_size < 1) {
    return NANOZ_ERROR_INVALID_ARGUMENT;
  }

  if (!uncompressed_size_out) {
    return NANOZ_ERROR_INVALID_ARGUMENT;
  }

  wuffs_zlib__decoder dec;
  wuffs_base__status status =
      wuffs_zlib__decoder__initialize(&dec, sizeof dec, WUFFS_VERSION, 0);
  if (!wuffs_base__status__is_ok(&status)) {
    // wuffs_base__status__message(&status);
    return NANOZ_ERROR_INTERNAL;
  }

  // TODO: Streamed decoding?

  wuffs_base__io_buffer dst;
  dst.data.ptr = dst_addr;
  dst.data.len = dst_size;
  dst.meta.wi = 0;
  dst.meta.ri = 0;
  dst.meta.pos = 0;
  dst.meta.closed = false;

  wuffs_base__io_buffer src;
  src.data.ptr = const_cast<uint8_t *>(src_addr);  // remove const
  src.data.len = src_size;
  src.meta.wi = src_size;
  src.meta.ri = 0;
  src.meta.pos = 0;
  src.meta.closed = false;

  status = wuffs_zlib__decoder__transform_io(
      &dec, &dst, &src,
      wuffs_base__make_slice_u8(work_buffer_array, WORK_BUFFER_ARRAY_SIZE));

  uint64_t uncompressed_size{0};

  if (dst.meta.wi) {
    dst.meta.ri = dst.meta.wi;
    uncompressed_size = dst.meta.wi;
    wuffs_base__io_buffer__compact(&dst);
  }

  if (status.repr == wuffs_base__suspension__short_read) {
    // ok
  } else if (status.repr == wuffs_base__suspension__short_write) {
    // read&write should succeed at once.
    return NANOZ_ERROR_CORRUPTED;
  }

  const char *stat_msg = wuffs_base__status__message(&status);
  if (stat_msg) {
    return NANOZ_ERROR_INTERNAL;
  }

  (*uncompressed_size_out) = uncompressed_size;

  return NANOZ_SUCCESS;
}

#ifndef NANOZ_MALLOC
#define NANOZ_MALLOC(sz) malloc(sz)
#define NANOZ_REALLOC(p, newsz) realloc(p, newsz)
#define NANOZ_FREE(p) free(p)
#endif

#ifndef NANOZ_REALLOC_SIZED
#define NANOZ_REALLOC_SIZED(p, oldsz, newsz) NANOZ_REALLOC(p, newsz)
#endif

#ifndef NANOZ_MEMMOVE
#define NANOZ_MEMMOVE(a, b, sz) memmove(a, b, sz)
#endif

#define NANOZ_UCHAR(x) (unsigned char)((x)&0xff)

// #ifndef NANOZ_ZLIB_COMPRESS
//  stretchy buffer; nanoz__sbpush() == vector<>::push_back() --
//  nanoz__sbcount() == vector<>::size()
#define nanoz__sbraw(a) ((int *)(void *)(a)-2)
#define nanoz__sbm(a) nanoz__sbraw(a)[0]
#define nanoz__sbn(a) nanoz__sbraw(a)[1]

#define nanoz__sbneedgrow(a, n) ((a) == 0 || nanoz__sbn(a) + n >= nanoz__sbm(a))
#define nanoz__sbmaybegrow(a, n) \
  (nanoz__sbneedgrow(a, (n)) ? nanoz__sbgrow(a, n) : 0)
#define nanoz__sbgrow(a, n) nanoz__sbgrowf((void **)&(a), (n), sizeof(*(a)))

#define nanoz__sbpush(a, v) \
  (nanoz__sbmaybegrow(a, 1), (a)[nanoz__sbn(a)++] = (v))
#define nanoz__sbcount(a) ((a) ? nanoz__sbn(a) : 0)
#define nanoz__sbfree(a) ((a) ? NANOZ_FREE(nanoz__sbraw(a)), 0 : 0)

static void *nanoz__sbgrowf(void **arr, int increment, int itemsize) {
  int m = *arr ? 2 * nanoz__sbm(*arr) + increment : increment + 1;
  void *p = NANOZ_REALLOC_SIZED(
      *arr ? nanoz__sbraw(*arr) : 0,
      *arr ? (nanoz__sbm(*arr) * itemsize + sizeof(int) * 2) : 0,
      itemsize * m + sizeof(int) * 2);
  if (!p) {
    return nullptr;
  }

  if (p) {
    if (!*arr) ((int *)p)[1] = 0;
    *arr = (void *)((int *)p + 2);
    nanoz__sbm(*arr) = m;
  }
  return *arr;
}

static unsigned char *nanoz__zlib_flushf(unsigned char *data,
                                         unsigned int *bitbuffer,
                                         int *bitcount) {
  while (*bitcount >= 8) {
    nanoz__sbpush(data, NANOZ_UCHAR(*bitbuffer));
    *bitbuffer >>= 8;
    *bitcount -= 8;
  }
  return data;
}

static int nanoz__zlib_bitrev(int code, int codebits) {
  int res = 0;
  while (codebits--) {
    res = (res << 1) | (code & 1);
    code >>= 1;
  }
  return res;
}

static unsigned int nanoz__zlib_countm(unsigned char *a, unsigned char *b,
                                       int limit) {
  int i;
  for (i = 0; i < limit && i < 258; ++i)
    if (a[i] != b[i]) break;
  return i;
}

static unsigned int nanoz__zhash(unsigned char *data) {
  uint32_t hash = data[0] + (data[1] << 8) + (data[2] << 16);
  hash ^= hash << 3;
  hash += hash >> 5;
  hash ^= hash << 4;
  hash += hash >> 17;
  hash ^= hash << 25;
  hash += hash >> 6;
  return hash;
}

#define nanoz__zlib_flush() (out = nanoz__zlib_flushf(out, &bitbuf, &bitcount))
#define nanoz__zlib_add(code, codebits) \
  (bitbuf |= (code) << bitcount, bitcount += (codebits), nanoz__zlib_flush())
#define nanoz__zlib_huffa(b, c) nanoz__zlib_add(nanoz__zlib_bitrev(b, c), c)
// default huffman tables
#define nanoz__zlib_huff1(n) nanoz__zlib_huffa(0x30 + (n), 8)
#define nanoz__zlib_huff2(n) nanoz__zlib_huffa(0x190 + (n)-144, 9)
#define nanoz__zlib_huff3(n) nanoz__zlib_huffa(0 + (n)-256, 7)
#define nanoz__zlib_huff4(n) nanoz__zlib_huffa(0xc0 + (n)-280, 8)
#define nanoz__zlib_huff(n)            \
  ((n) <= 143   ? nanoz__zlib_huff1(n) \
   : (n) <= 255 ? nanoz__zlib_huff2(n) \
   : (n) <= 279 ? nanoz__zlib_huff3(n) \
                : nanoz__zlib_huff4(n))
#define nanoz__zlib_huffb(n) \
  ((n) <= 143 ? nanoz__zlib_huff1(n) : nanoz__zlib_huff2(n))

#define nanoz__ZHASH 16384

// #endif // NANOZ_ZLIB_COMPRESS

unsigned char *nanoz_compress(unsigned char *data, int data_len, int *out_len,
                              int quality) {
  static unsigned short lengthc[] = {
      3,  4,  5,  6,  7,  8,  9,  10, 11,  13,  15,  17,  19,  23,  27,
      31, 35, 43, 51, 59, 67, 83, 99, 115, 131, 163, 195, 227, 258, 259};
  static unsigned char lengtheb[] = {0, 0, 0, 0, 0, 0, 0, 0, 1, 1,
                                     1, 1, 2, 2, 2, 2, 3, 3, 3, 3,
                                     4, 4, 4, 4, 5, 5, 5, 5, 0};
  static unsigned short distc[] = {
      1,    2,    3,    4,    5,    7,     9,     13,    17,   25,   33,
      49,   65,   97,   129,  193,  257,   385,   513,   769,  1025, 1537,
      2049, 3073, 4097, 6145, 8193, 12289, 16385, 24577, 32768};
  static unsigned char disteb[] = {0, 0, 0,  0,  1,  1,  2,  2,  3,  3,
                                   4, 4, 5,  5,  6,  6,  7,  7,  8,  8,
                                   9, 9, 10, 10, 11, 11, 12, 12, 13, 13};
  unsigned int bitbuf = 0;
  int i, j, bitcount = 0;
  unsigned char *out = NULL;

  if (!data) {
    return NULL;
  }

  if (data_len < 1) {
    return NULL;
  }

  if (!out_len) {
    return NULL;
  }

  unsigned char ***hash_table =
      (unsigned char ***)NANOZ_MALLOC(nanoz__ZHASH * sizeof(unsigned char **));
  if (hash_table == NULL) return NULL;
  if (quality < 5) quality = 5;

  nanoz__sbpush(out, 0x78);  // DEFLATE 32K window
  nanoz__sbpush(out, 0x5e);  // FLEVEL = 1
  nanoz__zlib_add(1, 1);     // BFINAL = 1
  nanoz__zlib_add(1, 2);     // BTYPE = 1 -- fixed huffman

  for (i = 0; i < nanoz__ZHASH; ++i) hash_table[i] = NULL;

  i = 0;
  while (i < data_len - 3) {
    // hash next 3 bytes of data to be compressed
    int h = nanoz__zhash(data + i) & (nanoz__ZHASH - 1), best = 3;
    unsigned char *bestloc = 0;
    unsigned char **hlist = hash_table[h];
    int n = nanoz__sbcount(hlist);
    for (j = 0; j < n; ++j) {
      if (hlist[j] - data > i - 32768) {  // if entry lies within window
        int d = nanoz__zlib_countm(hlist[j], data + i, data_len - i);
        if (d >= best) {
          best = d;
          bestloc = hlist[j];
        }
      }
    }
    // when hash table entry is too long, delete half the entries
    if (hash_table[h] && nanoz__sbn(hash_table[h]) == 2 * quality) {
      NANOZ_MEMMOVE(hash_table[h], hash_table[h] + quality,
                    sizeof(hash_table[h][0]) * quality);
      nanoz__sbn(hash_table[h]) = quality;
    }
    nanoz__sbpush(hash_table[h], data + i);

    if (bestloc) {
      // "lazy matching" - check match at *next* byte, and if it's better, do
      // cur byte as literal
      h = nanoz__zhash(data + i + 1) & (nanoz__ZHASH - 1);
      hlist = hash_table[h];
      n = nanoz__sbcount(hlist);
      for (j = 0; j < n; ++j) {
        if (hlist[j] - data > i - 32767) {
          int e = nanoz__zlib_countm(hlist[j], data + i + 1, data_len - i - 1);
          if (e > best) {  // if next match is better, bail on current match
            bestloc = NULL;
            break;
          }
        }
      }
    }

    if (bestloc) {
      int d = (int)(data + i - bestloc);  // distance back
      // NANOZ_ASSERT(d <= 32767 && best <= 258);
      if (d <= 32767 && best <= 258) {
        // OK
      } else {
        return NULL;  // FIXME: may leak
      }
      for (j = 0; best > lengthc[j + 1] - 1; ++j)
        ;
      nanoz__zlib_huff(j + 257);
      if (lengtheb[j]) nanoz__zlib_add(best - lengthc[j], lengtheb[j]);
      for (j = 0; d > distc[j + 1] - 1; ++j)
        ;
      nanoz__zlib_add(nanoz__zlib_bitrev(j, 5), 5);
      if (disteb[j]) nanoz__zlib_add(d - distc[j], disteb[j]);
      i += best;
    } else {
      nanoz__zlib_huffb(data[i]);
      ++i;
    }
  }
  // write out final bytes
  for (; i < data_len; ++i) nanoz__zlib_huffb(data[i]);
  nanoz__zlib_huff(256);  // end of block
  // pad with 0 bits to byte boundary
  while (bitcount) nanoz__zlib_add(0, 1);

  for (i = 0; i < nanoz__ZHASH; ++i) (void)nanoz__sbfree(hash_table[i]);
  NANOZ_FREE(hash_table);

  // store uncompressed instead if compression was worse
  if (nanoz__sbn(out) > data_len + 2 + ((data_len + 32766) / 32767) * 5) {
    nanoz__sbn(out) = 2;  // truncate to DEFLATE 32K window and FLEVEL = 1
    for (j = 0; j < data_len;) {
      int blocklen = data_len - j;
      if (blocklen > 32767) blocklen = 32767;
      nanoz__sbpush(
          out,
          data_len - j == blocklen);  // BFINAL = ?, BTYPE = 0 -- no compression
      nanoz__sbpush(out, NANOZ_UCHAR(blocklen));  // LEN
      nanoz__sbpush(out, NANOZ_UCHAR(blocklen >> 8));
      nanoz__sbpush(out, NANOZ_UCHAR(~blocklen));  // NLEN
      nanoz__sbpush(out, NANOZ_UCHAR(~blocklen >> 8));
      memcpy(out + nanoz__sbn(out), data + j, blocklen);
      nanoz__sbn(out) += blocklen;
      j += blocklen;
    }
  }

  {
    // compute adler32 on input
    unsigned int s1 = 1, s2 = 0;
    int blocklen = (int)(data_len % 5552);
    j = 0;
    while (j < data_len) {
      for (i = 0; i < blocklen; ++i) {
        s1 += data[j + i];
        s2 += s1;
      }
      s1 %= 65521;
      s2 %= 65521;
      j += blocklen;
      blocklen = 5552;
    }
    nanoz__sbpush(out, NANOZ_UCHAR(s2 >> 8));
    nanoz__sbpush(out, NANOZ_UCHAR(s2));
    nanoz__sbpush(out, NANOZ_UCHAR(s1 >> 8));
    nanoz__sbpush(out, NANOZ_UCHAR(s1));
  }
  *out_len = nanoz__sbn(out);
  // make returned pointer freeable
  NANOZ_MEMMOVE(nanoz__sbraw(out), out, *out_len);
  return (unsigned char *)nanoz__sbraw(out);
}

// from zlib
uint64_t nanoz_compressBound(uint64_t sourceLen)
{
  // TODO: Overflow check?
  return sourceLen + (sourceLen >> 12ull) + (sourceLen >> 14ull) +
         (sourceLen >> 25ull) + 13ull;
}


#endif  // NANOZDEC_IMPLEMENTATION

#endif /* NANOZDEC_H_ */