@article{4513,
  author = {Tiberiu S. Chis, Peter G. Harrison},
  title = {Incremental Hidden Markov Models for Real-Time Workload Characterization with Improved Backwards Approximations},
  journal = {Journal of Information Technology Review},
  year = {2025},
  volume = {16},
  number = {3},
  doi = {https://doi.org/10.6025/jitr/2025/16/3/106-112},
  url = {https://www.dline.info/jitr/fulltext/v16n3/jitrv16n3_3.pdf},
  abstract = {The paper introduces an incremental approach to training Hidden Markov Models (HMMs), particularly
aimed at modeling discrete-time workloads such as I/O traces. Traditional HMM training, notably using the
Baum-Welch algorithm, requires the full dataset in advance, which limits its applicability in real-time
applications. To address this, the authors develop IncHMM, a model that updates its parameters incrementally
as new data arrives, making it suitable for dynamic workloads.
Central to their method is creating two approximations of the backwards (ï¢ ) variables in the Forward-
Backwards algorithm, which enables the model to process new observations without reprocessing the entire
dataset. The first approximation assumes convergence properties in state probabilities, while the second
employs matrix inversion techniques with fallback strategies when certain conditions are not met. Both are
tested on real-world I/O traces that have been pre-processed using the K-means clustering algorithm.
Simulation results show that the incrementally trained HMMs closely match the statistical properties (mean
and standard deviation) of the raw data, especially for read operations. Write discrepancies are noted and
attributed to lower variance or model limitations. The authors suggest future improvements, including refining
the clustering algorithm and testing it on other time series, such as hospital arrival data, to enhance versatility
and accuracy.},
}