2016-12-01 2 views
0

저는 R에서 wordcloud로 작업하고 있습니다. 지금까지는 기본적인 작업만으로 성공했지만 특정 위치의 단어 구름을 보여주고 싶습니다. 예 나는 "나는 그것에서"USA "를 가진 위치에 대해 별도의 단어 구름 같은 갖는 위치를 원하는 여기다른 열을 기반으로하는 R 열의 Wordcloud

     TEXT                 LOCATION 
    True or false? link(#Addition, #Classification)           NewYork,USA 
    Gene deFuser: detecting gene fusion events from protein sequences #bmC#bioinformatics Norwich,UK 
    Biologists do have a sense of humor, especially computational bio people     France 
    Semantic Inference using #Chemogenomics Data for Drug Discovery       London,UK 

같은 텍스트가 나는

library(tm) 
library(SnowballC) 
library(wordcloud) 

DATA<-c('True or false? link(#Addition, #Classification) ','Gene deFuser: detecting gene fusion events from protein sequences #bmC#bioinformatics',' Biologists do have a sense of humor, especially computational bio people','Semantic Inference using #Chemogenomics Data for Drug Discovery') 
Location<-c('NewYork,USA','Norwich,UK',' France','London,UK') 

jeopQ<-data.frame(DATA,Location) 

jeopCorpus <- Corpus(VectorSource(jeopQ$DATA)) 
jeopCorpus <- tm_map(jeopCorpus, content_transformer(tolower)) 

jeopCorpus <- tm_map(jeopCorpus, removePunctuation) 
jeopCorpus <- tm_map(jeopCorpus, PlainTextDocument) 
jeopCorpus <- tm_map(jeopCorpus, removeNumbers) 
jeopCorpus <- tm_map(jeopCorpus, removeWords, stopwords('english')) 
jeopCorpus <- tm_map(jeopCorpus, stemDocument) 
myDTM = TermDocumentMatrix(jeopCorpus, control = list(minWordLength = 1)) 

m = as.matrix(myDTM) 

v = sort(rowSums(m), decreasing = TRUE) 
set.seed(4363) 
wordcloud(names(v), v,max.words =100,min.freq=3,scale=c(4,0.1), random.order = FALSE,rot.per=.5,vfont=c("sans serif","plain"),colors=palette()) 

을 사용하고 기본, 구름 코드가있는 경우 UK "라는 단어와 프랑스에 대한 별도의 워드 클 라우드가 가능합니까?

답변

0
jeopQ<-data.frame(DATA,Location) 

# Clean Location 
jeopQ$Location <- sub('.*,\\s*','', jeopQ$Location) 

# Loop 
for(i in unique(jeopQ$Location)){ 
    jeopCorpus <- Corpus(VectorSource(jeopQ$DATA[jeopQ$Location==i])) 
    jeopCorpus <- tm_map(jeopCorpus, content_transformer(tolower)) 

    jeopCorpus <- tm_map(jeopCorpus, removePunctuation) 
    jeopCorpus <- tm_map(jeopCorpus, PlainTextDocument) 
    jeopCorpus <- tm_map(jeopCorpus, removeNumbers) 
    jeopCorpus <- tm_map(jeopCorpus, removeWords, stopwords('english')) 
    jeopCorpus <- tm_map(jeopCorpus, stemDocument) 
    myDTM = TermDocumentMatrix(jeopCorpus, control = list(minWordLength = 1)) 

    m = as.matrix(myDTM) 

    v = sort(rowSums(m), decreasing = TRUE) 
    set.seed(4363) 
    wordcloud(names(v), v,max.words =100,min.freq=3,scale=c(4,0.1), random.order = FALSE,rot.per=.5,vfont=c("sans serif","plain"),colors=palette()) 

} 

enter image description here enter image description hereenter image description here

+1

이는 완벽하다! 고맙습니다! – hyeri

+0

@hyeri 도와 드리겠습니다. –

관련 문제