@inproceedings{9b16b4a5091d41208d85cf5a3c4dae3b,
title = "Is Appearance Free Action Recognition Possible?",
abstract = "Intuition might suggest that motion and dynamic information are key to video-based action recognition. In contrast, there is evidence that state-of-the-art deep-learning video understanding architectures are biased toward static information available in single frames. Presently, a methodology and corresponding dataset to isolate the effects of dynamic information in video are missing. Their absence makes it difficult to understand how well contemporary architectures capitalize on dynamic vs. static information. We respond with a novel Appearance Free Dataset (AFD) for action recognition. AFD is devoid of static information relevant to action recognition in a single frame. Modeling of the dynamics is necessary for solving the task, as the action is only apparent through consideration of the temporal dimension. We evaluated 11 contemporary action recognition architectures on AFD as well as its related RGB video. Our results show a notable decrease in performance for all architectures on AFD compared to RGB. We also conducted a complimentary study with humans that shows their recognition accuracy on AFD and RGB is very similar and much better than the evaluated architectures on AFD. Our results motivate a novel architecture that revives explicit recovery of optical flow, within a contemporary design for best performance on AFD and RGB.",
keywords = "Action recognition, Action recognition dataset, Deep learning, Human motion perception, Static and dynamic video representation",
author = "Filip Ilic and Thomas Pock and Wildes, {Richard P.}",
note = "Publisher Copyright: {\textcopyright} 2022, The Author(s), under exclusive license to Springer Nature Switzerland AG.; 2022 European Conference on Computer Vision : ECCV 2022, ECCV 2022 ; Conference date: 23-10-2022 Through 27-10-2022",
year = "2022",
doi = "10.1007/978-3-031-19772-7_10",
language = "English",
isbn = "9783031197710",
volume = "4",
series = "Lecture Notes in Computer Science",
publisher = "Springer Science and Business Media Deutschland GmbH",
pages = "156--173",
editor = "Shai Avidan and Gabriel Brostow and Moustapha Ciss{\'e} and Farinella, {Giovanni Maria} and Tal Hassner",
booktitle = "Computer Vision – ECCV 2022",
address = "Germany",
}