의 "수준"떨어져 당겨 수 있다고 말한다이다, 나는 정규식을 사용하여 더 나은 결과를 얻었다 발견했다. 모든 상황을 고려하지는 않지만 충분한 유연성을 제공합니다. 내가 여기서 공유하고, 다른 사람들에게 유용 할 수 있다고 생각했다. 물론 garanties.
# This function takes a string referring to existing data and parses it
# to get information on the data structure.
#
# Example:
#
# > .parse.arg("iris[1:200,c(1,4)]")
# $arg.str
# [1] "iris[1:200,c(1,4)]"
#
# $row.index
# [1] "1:200"
#
# $col.index
# [1] "c(1,4)"
#
# $df.name
# [1] "iris"
#
# $col.names
# [1] "Sepal.Length" "Petal.Width"
.parse.arg <- function(arg.str) {
# Check if arg.str is a string
if(!is.character(arg.str))
stop("arg.str must be a string")
# Recuperate the object designated by arg.str; this is to allow further work
x <- try(eval(parse(text=arg.str)))
if(inherits(x, "try-error")) {
message("arg.str must match an existing object")
return()
}
if(!is.data.frame(x) && !is.atomic(x)) {
message("arg.str must match an atomic structure (vector/factor) or a dataframe")
return()
}
# Initialise output list
output <- list()
# Store a copy of the arg.str in output object
output$arg.str <- arg.str
# Trim the string removing leading/trailing blanks
arg.str <- gsub("^\\s+|\\s+$", "", arg.str)
# Get rid of spaces next to brackets and next to comma in indexing brackets.
# Note: that way assures us to not remove any spaces in quoted structures
# such as ['var name']
arg.str <- gsub("\\s*\\[\\s*","[", arg.str, perl=TRUE) # spaces near [
arg.str <- gsub("\\s*\\]\\s*","]", arg.str, perl=TRUE) # spaces near ]
arg.str <- gsub("^(.*)(\\[\\d+:\\d+)?\\s?,\\s?(.+)$", "\\1\\2,\\3", arg.str, perl=TRUE)
# Change [[]] to [] for the last pair of brackets; this simplifies the work
arg.str <- sub("\\[{2}(.*)\\]{2}$", "[\\1]", arg.str, perl=TRUE)
# Change references to data with ['name'] or [['name']] into $name, also to simplify matters
re.brack <- '\\[{1,2}[\'\"]'
if(grepl(re.brack, arg.str)) {
arg.str <- gsub('\\[{1,2}[\'\"]', "$", arg.str, perl=TRUE)
arg.str <- gsub('[\'\"]\\]{1,2}', "", arg.str, perl=TRUE)
}
# Next we'll isolate indexing in the last brackets
re.index <- "(.*?)\\[(.*?)\\]$"
if(grepl(re.index, arg.str)) {
indexes <- sub(re.index, "\\2", arg.str, perl=TRUE)
# Further decompose the indexes
# indexing having 2 elements (rows, columns), will be identified by this regex
# [1:10,] or [,"Species] will also match
re.split.index <- "^(.+)?,+(c\\(.*\\)|\\d+|\\d+:\\d+|'.*'|\".+\")$"
if(grepl(re.split.index, indexes, perl = TRUE)) {
output$rows.subset <- sub(re.split.index, "\\1", indexes, perl=TRUE)
output$col.index <- sub(re.split.index, "\\2", indexes, perl=TRUE)
# Remove any empty string
if(nchar(output$rows.subset) == 0)
output$rows.subset <- NULL
if(nchar(output$col.index) == 0)
output$col.index <- NULL
}
# When previous regex does not match, it means the index has only 1 element,
# either row or column. When a comma is present:
else if(substring(indexes,1,1) == ",")
output$col.indexes <- sub("^,", "", indexes, perl = TRUE)
else if(substring(indexes,nchar(indexes),nchar(indexes)) == ",")
output$rows.subset <- sub(",$", "", indexes, perl = TRUE)
# When there is no comma, we'll check if x is a dataframe or not.
# If it is, the index refers to columns, and otherwise, to rows
else {
# first we need to reevaluate the arg.str
x.tmp <- eval(parse(text = arg.str))
if(is.data.frame(x.tmp))
output$col.index <- indexes
else
output$rows.subset <- indexes
}
# Update the string to remove what's already accounted for
arg.str <- sub(re.index, "\\1", arg.str, perl=TRUE)
}
# Split arg.str by "$" to identify structures
output$data.struct <- strsplit(arg.str, "$", fixed = TRUE)[[1]]
# If type of x is dataframe, normally the last element in the data structures
# should be the df name
if(is.data.frame(x)) {
output$df.name <- tail(output$data.struct,1)
output$col.names <- colnames(x)
}
# Otherwise, depending on the situation, we'll try to get at the df name and its colnames()
else {
# If vector is referred to via column indexing, recup the column's name
# by an evaluation of the form df[col.index]
if("col.index" %in% names(output)) {
output$var.name <- eval(parse(text=paste("colnames(",arg.str,"[",output$col.index,"])")))
#output$col.names <- eval(parse(text=paste("colnames(",arg.str,"[",output$col.index,"])")))
output$df.name <- tail(output$data.struct,1)
}
# If there is no column indexing, it means the vector's name is in the
# data.struc list, along with the df name one level higher, unless the vector
# was "standalone"
else {
output$var.name <- tail(output$data.struct,1)
if(length(output$data.struct)>1)
output$df.name <- output$data.struct[length(output$data.struct)-1]
}
}
# remove last item from data.struct when it's the same as var.name to avoid redundancy
output$data.struct <- setdiff(output$data.struct, output$var.name)
# same with df.name and data.struct
output$data.struct <- setdiff(output$data.struct, output$df.name)
# cleanup
if(length(output$data.struct)==0)
output$data.struct <- NULL
# Further validate the items to return;
if(isTRUE(grepl('[\\(\\[]', output$df.name)))
output$df.name <- NULL
if(isTRUE(grepl('[\\(\\[]', output$var.name)))
output$var.name <- NULL
return(output)
}
'f <- function (x) as.character (match.call() [- 1])'의 경우는 무엇입니까? 'as.character' 대신에'as.list'를 사용할 수도 있습니다. 당신이 여기서 뭘하는지 분명하지는 않지만. 아마도 예제를 제공하면 도움이 될 것입니다. –
그게 내가하려는 것입니다. 컨텍스트는 [이 페이지] (https://github.com/dcomtois/summarytools)를 참조하십시오. 결국에는 데이터 프레임의 이름과 변수의 이름을 정확하게 확인하고 식별하려고합니다. 나는 정규 표현식을 사용하는 것보다 다른 옵션이 없다고 생각한다. –