@inproceedings{a13c0f87dbfd4c6599595ea6b940220d,
title = "Effects of modelling within- and between-frame temporal variations in power spectra on non-verbal sound recognition",
abstract = "Research on environmental sound recognition has not shown great development in comparison with that on speech and musical signals. One of the reasons is that the sound category of environmental sounds covers a broad range of acoustical natures. We classified them in order to explore suitable recognition techniques for each characteristic. We focus on impulsive sounds and their non-stationary feature within and between analytic frames. We used matching-pursuit as a framework to use wavelet analysis for extracting temporal variation of audio features inside a frame. We also investigated the validity of modeling decaying patterns of sounds using Hidden markov models. Experimental results indicate that sounds with multiple impulsive signals are recognized better by using time-frequency analyzing bases than by frequency domain analysis. Classification of sound classes with a long and clear decaying pattern improves when HMMs with multiple number of hidden states are applied.",
keywords = "Audio signal classification, Environmental sound recognition, Matching-pursuit, Non-speech sound recognition, Time-frequency analysis",
author = "Nobuhide Yamakawa and Tetsuro Kitahara and Toru Takahashi and Kazunori Komatani and Tetsuya Ogata and Okuno, {Hiroshi G.}",
year = "2010",
language = "English",
series = "Proceedings of the 11th Annual Conference of the International Speech Communication Association, INTERSPEECH 2010",
publisher = "International Speech Communication Association",
pages = "2342--2345",
booktitle = "Proceedings of the 11th Annual Conference of the International Speech Communication Association, INTERSPEECH 2010",
}