@article{4755, author = {Nguyen Minh Tuan}, title = {Optimizing High-Dimensional Data Analysis Through Latent Representation Learning, Contrastive Embedding, and Consensus Clustering for Sensor Intelligence}, journal = {Digital Signal Processing and Artificial Intelligence for Automatic Learning}, year = {2026}, volume = {5}, number = {2}, doi = {https://doi.org/10.6025/dspaial/2026/5/2/51-79}, url = {https://www.dline.info/dspai/fulltext/v5n2/dspaiv5n2_1.pdf}, abstract = {High-dimensional sensor data present substantial analytical challenges due to feature sparsity, nonlinear dependencies, noisy observations, and unstable clustering behavior in conventional machine learning frameworks. Although deep representation learning and subspace clustering methods have significantly advanced high-dimensional analytics, many existing approaches remain fragmented, operating independently across latent representation learning, semantic embedding optimization, and clustering stages. This study proposes a unified multi-stage unsupervised sensor intelligence framework integrating Variational Autoencoder (VAE)-based probabilistic latent representation learning, contrastive representation optimization using Triplet Networks, SimCLR, and BYOL, and consensus hybrid clustering for robust subgroup discovery in high-dimensional digital sensor signals. The proposed framework is evaluated using the IEEE DataPort binary-classification digital sensor signal dataset comprising 11 numerical sensor features characterized by temporal dependencies and event-driven activation patterns. Experimental evaluation incorporates quantitative clustering metrics including Silhouette Score, Davies–Bouldin Index, Calinski– Harabasz Index, reconstruction analysis, cluster stability evaluation, comparative baselines, and consensus clustering validation. Results demonstrate that probabilistic latent manifold learning substantially improves structural organization, semantic continuity, and anomaly-sensitive clustering compared with conventional dimensionality reduction and raw-feature clustering approaches. Hierarchical clustering provides balanced subgroup segmentation, whereas spectral clustering exhibits strong sensitivity toward sparse anomalous structures. The proposed consensus hybrid clustering mechanism combines these complementary strengths to produce stable and semantically coherent latent partitions. The study contributes a robust and scalableframework for intelligent sensing, anomaly detection, cyber-physical monitoring, and unsupervised IoT analytics.}, }