@article{4691,
  author = {Ricardo RodrÃ­guez Jorge},
  title = {Dataset-Level Entropy Characterization and Complexity- Aware Benchmarking for Learned Residual Coding},
  journal = {Electronic Devices},
  year = {2026},
  volume = {15},
  number = {1},
  doi = {https://doi.org/10.6025/ed/2026/15/1/20-32},
  url = {https://www.dline.info/ed/fulltext/v15n1/edv15n1_2.pdf},
  abstract = {This study introduces a comprehensive entropy characterization and complexity-aware benchmarking framework
for residual data generated in learned image and video compression pipelines. Analyzing a large
corpus of raw, content agnostic residual symbols at byte level granularity, we uncover critical statistical
properties that challenge conventional entropy modeling assumptions. Empirical analysis reveals substantial
heterogeneity across residuals, with entropy values spanning 3.5-5.0 bits per symbol (mean Â±1 bit
standard deviation) significantly below the theoretical maximum of 8 bits indicating rich statistical redundancy
exploitable for compression. Symbol distributions exhibit pronounced heavy tailed characteristics,
with only 10-15 dominant symbols accounting for 50-70% of occurrences, invalidating Gaussian or Laplacian
assumptions pervasive in traditional codecs. Bit plane analysis further demonstrates extreme sparsity (95-
98%) in higher order planes, confirming residuals concentrate tightly around zero a signature of effective
prediction. Crucially, entropy exhibits strong non stationarity across content instances, naturally stratifying
into low, medium, and high complexity regimes. Rate distortion complexity (RDC) evaluation across
hyperprior, autoregressive, and transformer based entropy models quantifies fundamental trade offs: while
transformer architectures achieve up to 1.36 dB gains over hyperpriors at low bitrates, they impose >12Ã—
computational overhead, revealing diminishing returns beyond moderate complexity budgets. The
autoregressive approach delivers optimal balance (0.78 dB gain at 5.2Ã— complexity). Our dataset and analysis
framework provide a content agnostic benchmark for evaluating entropy modeling efficacy independent of
codec specific preprocessing, establishing that adaptive, complexity aware strategies not static global models
are essential for efficient learned compression deployment across diverse hardware constraints.},
}