@article{4690,
  author = {Yao-Liang Chung},
  title = {Edge AI Chip Architecture: A Hierarchical Design Framework for Energy-Efficient On-Device Intelligence},
  journal = {Electronic Devices},
  year = {2026},
  volume = {15},
  number = {1},
  doi = {https://doi.org/10.6025/ed/2026/15/1/1-19},
  url = {https://www.dline.info/ed/fulltext/v15n1/edv15n1_1.pdf},
  abstract = {Edge artificial intelligence (AI) chips represent a transformative class of semiconductor devices engineered
to execute AI workloads directly on endpoint devices, eliminating persistent cloud dependency while
satisfying stringent constraints on power consumption, latency, and data privacy. This paper presents a
comprehensive hierarchical architecture framework comprising four interdependent layers compute fabric,
memory subsystem, interconnect network, and system integration that collectively address the multidimensional
optimization challenges inherent in on-device intelligence. We demonstrate that edge AI
architectures fundamentally diverge from conventional processors through specialized neural processing
units (NPUs), near data processing memory hierarchies, and milliwatt to watt power profiles enabling
always on operation. Critically, performance requirements span three orders of magnitude across deployment
domains: automotive systems demand ï‚³ï€  10 TOPS within ~10 W envelopes for real time inference (<30 ms),
smartphones operate under ï‚£ 1 W thermal constraints, and always on wearables target sub-100 mW
operation with maximal data compression mandating application specific architectural specialization rather
than monolithic designs. A foundational insight driving innovation is recognition that data movement, not
computation, constitutes the dominant energy bottleneck; consequently, leading architectures implement
compression aware memory subsystems that reduce bandwidth requirements by 5â€“30Ã— while simultaneously
increasing throughput and reducing power consumption by 30â€“50%. Furthermore, neuromorphic co-design
principles inspired by biological neural systems, encompassing synaptic plasticity circuits, spike-based
asynchronous computation, and in memory crossbar arrays, enable computational efficiencies approaching
the brain's remarkable 20 W energy envelope. We establish that edge AI does not supplant cloud infrastructure
but rather forms a symbiotic continuum in which edge devices resolve >95% of inference requests locally,
while cloud resources handle model training and federated learning aggregation. This holistic co-design
philosophy, spanning algorithms, circuits, and systems, positions edge AI architectures as foundational
enablers of scalable, responsive, and privacy aware intelligent systems for the next generation of cyberphysical
applications.},
}