@article{4513, author = {Tiberiu S. Chis, Peter G. Harrison}, title = {Incremental Hidden Markov Models for Real-Time Workload Characterization with Improved Backwards Approximations}, journal = {Journal of Information Technology Review}, year = {2025}, volume = {16}, number = {3}, doi = {https://doi.org/10.6025/jitr/2025/16/3/106-112}, url = {https://www.dline.info/jitr/fulltext/v16n3/jitrv16n3_3.pdf}, abstract = {The paper introduces an incremental approach to training Hidden Markov Models (HMMs), particularly aimed at modeling discrete-time workloads such as I/O traces. Traditional HMM training, notably using the Baum-Welch algorithm, requires the full dataset in advance, which limits its applicability in real-time applications. To address this, the authors develop IncHMM, a model that updates its parameters incrementally as new data arrives, making it suitable for dynamic workloads. Central to their method is creating two approximations of the backwards ( ) variables in the Forward- Backwards algorithm, which enables the model to process new observations without reprocessing the entire dataset. The first approximation assumes convergence properties in state probabilities, while the second employs matrix inversion techniques with fallback strategies when certain conditions are not met. Both are tested on real-world I/O traces that have been pre-processed using the K-means clustering algorithm. Simulation results show that the incrementally trained HMMs closely match the statistical properties (mean and standard deviation) of the raw data, especially for read operations. Write discrepancies are noted and attributed to lower variance or model limitations. The authors suggest future improvements, including refining the clustering algorithm and testing it on other time series, such as hospital arrival data, to enhance versatility and accuracy.}, }