@article{4768, author = {Ricardo Rodríguez Jorge}, title = {Probability Concentration and Rare-Event Characterization in High-Dimensional Correlated Multivariate Bernoulli Systems}, journal = {Journal of Information Technology Review}, year = {2026}, volume = {17}, number = {3}, doi = {https://doi.org/10.6025/jitr/2026/17/3/120-147}, url = {https://www.dline.info/jitr/fulltext/v17n3/jitrv17n3_2.pdf}, abstract = {High-dimensional correlated multivariate Bernoulli systems exhibit exponentially large state spaces, making the characterization of rare events and probability concentration a significant computational challenge. This study presents a comprehensive analytical framework to investigate sparsity, dominance, and tail behavior within such discrete binary systems. Utilizing a benchmark dataset of 20 correlated Bernoulli variables, we systematically analyze state spaces across increasing dimensionalities (4 to 20 bits) through tail probability evaluation, Herfindahl Hirschman Index (HHI) concentration metrics, activation pattern characterization, and Generalized Extreme Value (GEV) modeling. Results reveal extreme sparsity, with over 99.9% of configurations at the highest dimension possessing probabilities below 10-4 . Despite a theoretical state space of 1,048,576 configurations, probability mass is heavily concentrated: a single dominant state captures approximately 20% of the total mass, and the top 100 states account for over 50%. Consequently, the effective number of states remains remarkably low (approximately 23.51), indicating that practical system complexity is vastly smaller than its combinatorial size. Furthermore, GEV modeling confirms that extreme configurations follow a predictable, heavy-tailed statistical regime rather than random fluctuations. These findings demonstrate that high-dimensional correlated binary systems are governed by a small subset of highly activated configurations. This insight enables efficient, reduced state representations crucial for advancing reliability analysis, anomaly detection, and risk assessment in complex stochastic networks.}, }