@article{VilAloGomIPM2017,
title = "Supervised sentiment analysis in multilingual environments",
journal = "Information Processing \& Management",
volume = "53",
number = "3",
pages = "595 - 607",
year = "2017",
note = "",
issn = "0306-4573",
doi = "http://dx.doi.org/10.1016/j.ipm.2017.01.004",
url = "http://www.sciencedirect.com/science/article/pii/S0306457316302540",
author = "David Vilares and Miguel A. Alonso and Carlos G\'{o}mez-Rodr\'{i}guez",
keywords = "Sentiment analysis",
keywords = "Multilingual",
keywords = "Code-Switching ",
abstract = "Abstract This article tackles the problem of performing multilingual polarity classification on Twitter, comparing three techniques: (1) a multilingual model trained on a multilingual dataset, obtained by fusing existing monolingual resources, that does not need any language recognition step, (2) a dual monolingual model with perfect language detection on monolingual texts and (3) a monolingual model that acts based on the decision provided by a language identification tool. The techniques were evaluated on monolingual, synthetic multilingual and code-switching corpora of English and Spanish tweets. In the latter case we introduce the first code-switching Twitter corpus with sentiment labels. The samples are labelled according to two well-known criteria used for this purpose: the SentiStrength scale and a trinary scale (positive, neutral and negative categories). The experimental results show the robustness of the multilingual approach (1) and also that it outperforms the monolingual models on some monolingual datasets. "
}
