내가 자주 EUROSTAT의 데이터를 사용하고 꽤 성가신 나는 EUROSTAT http://epp.eurostat.ec.europa.eu/NavTree_prod/everybody/BulkDownloadListing?sort=1&dir=dic%2FenEUROSTAT에서 R로 데이터를 가져 오는 솔루션입니까?
에서 일괄 다운로드 기능이 제공하는 모든 데이터 세트를 얻기 위해이 조각을 쓴 데이터가 R.로 바로 전방으로로드 할 수 없음을 발견 거기 더 좋은 방법? 하나는 나를 위해 일한 ..이 :
#this library is used to download data from eurostat and to find datasets
#later extend to extend to find datasets with certain dimensions
#download data from eurostat
#unpack and convert to dataframe
#load label descriptions
#load factors
#save as r data object
datasetname="ebd_all"
LANGUAGE="en"
install.packages("RCurl")
library(RCurl)
library(data.table)
library(reshape)
library(stringr)
baseurl="http://epp.eurostat.ec.europa.eu/NavTree_prod/everybody/BulkDownloadListing?sort=1&file=data%2F"
fullfilename=paste(datasetname,".tsv.gz",sep="")
temp <- paste(tempfile(),".gz",sep="")
download.file(paste(baseurl,fullfilename,sep=""),temp)
dataconnection <- gzfile(temp)
d=read.delim(dataconnection)
longdata=melt(d,id=colnames(d)[1])
firstname=colnames(d)[1] # remove .time and count how many headings are there
firstname=substr(firstname,1,nchar(firstname)-nchar(".time"))
headings=toupper(strsplit(firstname,".",fixed=TRUE)[[1]])
headingcount=length(headings)
colnames(longdata)=c("dimensions","time","value")
#get the data on the dimension tables
df=data.frame(dimensions=as.character(longdata[,"dimensions"]))
df = transform(df, dimensions= colsplit(dimensions, split = "\\,",names=headings))
dimensions=data.table(df$dimensions)
#download the dimension labels - save headings as better variable
dimfile=paste("http://epp.eurostat.ec.europa.eu/NavTree_prod/everybody/BulkDownloadListing?sort=1&file=dic%2F",LANGUAGE,"%2Fdimlst.dic",sep="")
temp <- paste(tempfile(),".gz",sep="")
download.file(dimfile,temp)
dataconnection <- gzfile(temp)
dimdata=read.delim(dataconnection,header=FALSE)
colnames(dimdata)=c("colname","desc")
lab=dimdata$desc
names(lab)=dimdata$colname
#create headings that speak for themselves for columns
speakingheadings=as.character(lab[headings])
#download factors for each heading and add
for(heading in headings){
factorfile=paste("http://epp.eurostat.ec.europa.eu/NavTree_prod/everybody/BulkDownloadListing?sort=1&file=dic%2F",LANGUAGE,"%2F",tolower(heading),".dic",sep="")
temp <- paste(tempfile(),".gz",sep="")
download.file(factorfile,temp)
dataconnection <- gzfile(temp)
factordata=read.delim(dataconnection,header=FALSE)
colnames(factordata)=c(heading,paste(heading,"_desc",sep=""))
#join the heading to the heading dataset
dimensions=merge(dimensions,factordata,by=heading,all.x=TRUE)
}
#at the end at speaking headings
setnames(dimensions,colnames(dimensions)[1:length(speakingheadings)],speakingheadings)
#add data columns by writing and reading again---FASTER ;-)
temp=tempfile()
values=data.frame(value=as.character(longdata$value))
values = transform(values, value= colsplit(value, split = "\\ ",names=c("value","flag")))
values=values$value
values=data.table(values)
values$value=as.character(values$value)
values$flag=as.character(values$flag)
values[value==flag,flag:=NA]
values$value=as.double(values$value)
eurostatdata=cbind(dimensions,time=longdata$time,values)
save(eurostatdata,file=paste(datasetname,".RData"))
[eurostat] (https://cran.r-project.org/web/packages/eurostat/index.html)라는 R 패키지가 2015 년에 출시되었습니다. 멋진 [vignette] (https : // github. co.kr/rOpenGov/eurostat/blob/master/vignettes/2015-RJournal/lahti-huovari-kainu-biecek.md)에서는 eurostat 데이터를 다운로드하고 시각화하기위한 예제 지침을 제공합니다. 아래에 언급 된 [SmarterPoland] (https://cran.r-project.org/web/packages/SmarterPoland/index.html) 패키지의 저자는 eurostat 패키지의 공동 저자 중 한 명입니다. –