@inproceedings{c8597e7082aa4dd6a7937969847c7329,
title = "Crowdsourcing for evaluating machine translation quality",
abstract = "The recent popularity of machine translation has increased the demand for the evaluation of translations. However, the traditional evaluation approach, manual checking by a bilingual professional, is too expensive and too slow. In this study, we confirm the feasibility of crowdsourcing by analyzing the accuracy of crowdsourcing translation evaluations. We compare crowdsourcing scores to professional scores with regard to three metrics: translation-score, sentence-score, and system-score. A Chinese to English translation evaluation task was designed using around the NTCIR-9 PATENT parallel corpus with the goal being 5-range evaluations of adequacy and fluency. The experiment shows that the average score of crowdsource workers well matches professional evaluation results. The system-score comparison strongly indicates that crowdsourcing can be used to find the best translation system given the input of 10 source sentence.",
keywords = "Crowdsourcing, Evaluation, Machine translation",
author = "Shinsuke Goto and Donghui Lin and Toru Ishida",
year = "2014",
month = jan,
day = "1",
language = "English",
series = "Proceedings of the 9th International Conference on Language Resources and Evaluation, LREC 2014",
publisher = "European Language Resources Association (ELRA)",
pages = "3456--3463",
editor = "Nicoletta Calzolari and Khalid Choukri and Sara Goggi and Thierry Declerck and Joseph Mariani and Bente Maegaard and Asuncion Moreno and Jan Odijk and Helene Mazo and Stelios Piperidis and Hrafn Loftsson",
booktitle = "Proceedings of the 9th International Conference on Language Resources and Evaluation, LREC 2014",
note = "9th International Conference on Language Resources and Evaluation, LREC 2014 ; Conference date: 26-05-2014 Through 31-05-2014",
}