@article{742, author = {Ales Mishchenko, Natalia Vassilieva}, title = {Model-Based Recognition and Extraction of Information from Chart Images}, journal = {Journal of Multimedia Processing and Technologies}, year = {2011}, volume = {2}, number = {2}, doi = {}, url = {www.dline.info/jmpt/fulltext/v2n2/2.pdf}, abstract = {Charts are widely used in technical and business documents as a graphical representation of numerical and qualitative data. We present a model-based method to automatically extract data carried by charts and convert them to XML format, thus making these data available for indexing, querying and analysis by common methods of textual data management. The proposed method includes several steps: 1) chart detection, 2) model-based classification by chart type and extraction of graphical components, 3) detection and recognition of textual components, 4) extraction of semantic relations between graphics and text. For testing purpose, a benchmark set was created with the XML/SWF Chart tool. By comparing the recovered data and the original data used for chart generation, we are able to evaluate our information extraction algorithm and confirm its validity. We also extensively tested each step of the proposed algorithm against existing approaches. Modelbased classification showed high accuracy, comparable to those of the best supervised learning methods. The proposed text detection algorithm leads to significant improvement in text recognition rate (up to 20 times better).}, }