r - Document Term Matrix throwing error: Error in simple_triplet_matrix -
i creating word cloud based on tweets various different sports teams. code executes 1 in 10 times:
library(tm) library(snowballc) twt.mumbai <- searchtwitter('mumbai',n=50, lang = "en") twt.london <- searchtwitter('london',n=50, lang = "en") save(list="twt.mumbai", file="mumbai.rdata") save(list="twt.london", file="london.rdata") load(file = "mumbai.rdata") load(file = "london.rdata") tweets.mumbai <- lapply(twt.mumbai, function(t) {t$gettext()}) tweets.london <- lapply(twt.london, function(t) {t$gettext()}) data.sourcem <- vectorsource(tweets.mumbai) data.sourcel <- vectorsource(tweets.london) data.corpusm <- corpus(data.sourcem) data.corpusl <- corpus(data.sourcel) #preprocessing #removepunctuation data.corpusm <- tm_map(data.corpusm, content_transformer(removepunctuation), lazy = true) data.corpusl <- tm_map(data.corpusl, content_transformer(removepunctuation), lazy = true) #urlremove removeurl <- function(x) gsub("http[^[:space:]]*", "", x) data.corpusm <- tm_map(data.corpusm, content_transformer(removeurl), lazy = true) data.corpusl <- tm_map(data.corpusl, content_transformer(removeurl), lazy = true) #removenumber data.corpusm <- tm_map(data.corpusm, removenumbers, lazy = true) data.corpusl <- tm_map(data.corpusl, removenumbers, lazy = true) #extrawhitespace data.corpusm <- tm_map(data.corpusm, stripwhitespace, lazy = true) data.corpusl <- tm_map(data.corpusl, stripwhitespace, lazy = true) #stem data.corpusm <- tm_map(data.corpusm, content_transformer(stemdocument), lazy = true) data.corpusl <- tm_map(data.corpusl, content_transformer(stemdocument), lazy = true) inspect(data.corpusm[1:2]) inspect(data.corpusl[1:2]) tdm.mumbai <- documenttermmatrix(data.corpusm, control = list(stemdocument=true)) tdm.london <- documenttermmatrix(data.corpusl, control = list(stemdocument=true))
the document term matrix throwing following error: error in simple_triplet_matrix(i = i, j = j, v = as.numeric(v), nrow = length(allterms), : 'i, j, v' different lengths in addition: warning messages: 1: in mclapply(unname(content(x)), termfreq, control) : scheduled cores encountered errors in user code 2: in simple_triplet_matrix(i = i, j = j, v = as.numeric(v), nrow = length(allterms), : nas introduced coercion
Comments
Post a Comment