@article{1388, author = {Saad Ahmed Qureshi, AmmarSaleemRehman, Ali Mustafa Qamar, Aatif Kamal, Summaya Mumtaz, KhurramJaved}, title = {Comparative Analysis of Machine Learning Techniques for Telecommunication Subscribers’ Churn Prediction}, journal = {Journal of Intelligent Computing}, year = {2013}, volume = {4}, number = {4}, doi = {}, url = {http://www.dline.info/jic/fulltext/v4n4/2.pdf}, abstract = {During the last two decades, the mobile communication has become a dominant medium of communication. In numerous countries, especially the developed ones, the market is saturated to the extent that each new customer must be won over from the competitors. Advancements in technology and rapid improvements in telecom industry have provided customers with many choices. Customer retention is one of the major tasks for the telecom industry. On the other hand, public policies and standardization of mobile communication now allow customers to easily switch over from one carrier to another, resulting in a highly fluid market. Churn refers to customers who will leave or turn to other service providers. Acquiring new customers is much more expensive as compared to retaining existing customers. Therefore, it is far more cost-effective for service providers to predict customers who will churn in future and customize services or packages according to the customer’s demands. As a result, churn prediction has emerged as one of the most crucial Business Intelligence (BI) applications that aim at identifying customers who are about to transfer to a competitor. In this paper, we present commonly used data mining techniques for the identification of customers who are about to churn. Based on historical data, these methods try to find patterns which can identify possible churners. Some of the well-known algorithms used during this research are Regression analysis, Decision Trees and Artificial Neural Networks (ANNs). The data set used in this study was obtained from Customer DNA website. It contains traffic data of 106,000 customers and their usage behavior for 3 months. The data set comprises of 48 variables. Spearman’s correlation coefficient is used to select the variables of high impact.In order to solve the problem of class imbalance in the data set, re-sampling is used.The results show that the decision treesisthe most accurate classifier algorithm while identifying potential churners.}, }