@inproceedings{99df64c905cf48d9a8cc492d91ae2f9d,
title = "An Approach to Automatically Extract Predictive Properties from Nominal Attributes in Relational Databases",
abstract = "Feature engineering is a fundamental step in data mining and yet it is both difficult and expensive. Hand-crafting features is not only a time-consuming task that requires specific domain knowledge, it also may prevent new information to emerge. The extraction of meaningful features from relational data is particularly difficult due to complex relationships between tables. In the last decade there is an emerging trend towards automating the process of constructing propositional features from relational data and such approaches have been successfully used for solving numerous real-world problems. Despite their success, most of them lack an adequate support of nominal attributes. We present a new approach helping propositionalization methods to extract meaningful features from nominal attributes and improve their predictive performance. In an experimental evaluation on three datasets we demonstrate that the proposed technique is capable of producing novel features that are highly correlated with the target attribute. Furthermore, those features can reveal relationships among the distinct categorical values allowing to compare and order them. Finally, experimental results show that those new features can significantly improve the predictive performance in classification tasks.",
keywords = "Aggregation, Automated feature engineering, Nominal data, Propositionalization, Relational data mining",
author = "Valentin Kassarnig and Franz Wotawa",
year = "2019",
month = jan,
day = "22",
doi = "10.1109/BigData.2018.8622359",
language = "English",
series = "Proceedings - 2018 IEEE International Conference on Big Data, Big Data 2018",
publisher = "IEEE",
pages = "4932--4939",
editor = "Yang Song and Bing Liu and Kisung Lee and Naoki Abe and Calton Pu and Mu Qiao and Nesreen Ahmed and Donald Kossmann and Jeffrey Saltz and Jiliang Tang and Jingrui He and Huan Liu and Xiaohua Hu",
booktitle = "Proceedings - 2018 IEEE International Conference on Big Data, Big Data 2018",
address = "United States",
note = "2018 IEEE International Conference on Big Data, Big Data 2018 ; Conference date: 10-12-2018 Through 13-12-2018",
}