我有一个具有NA值的数据框
df <- data.frame("About" = c("Ram","Std 8",NA,NA,NA,"John", "Std 9", NA, NA,NA,NA),
"Questions" = c(NA,NA,"Q1","Q2","Q3",NA,NA,"Q1","Q2","Q3","Q4"),
"Ratings" = c(NA,NA,7,7,7,NA,NA,7,7,7,7), stringsAsFactors = FALSE)
预期输出如下:
expectedOutput <- data.frame("About" = c("Ram","John"),
"Standard" = c("Std 8", "Std 9"),
"Q1" = c(7,7),
"Q2" = c(7,7),
"Q3" = c(7,7),
"Q4" = c(0,7))
我尝试使用reshape
功能实现此目的
DataTransform <- reshape(df, idvar = "About", v.names = "Ratings", timevar = "Questions", direction = "wide")
任何人都可以通过重塑给定的数据框来帮助我实现预期的输出吗?
谢谢!!
在使用reshape或pivot_wider之前,我们需要转换适合这种转换的数据。
library(tidyverse) #for all the awesome packages
library(janitor) #to clean names
df <- data.frame("About" = c("Ram","Std 8",NA,NA,NA,"John", "Std 9", NA, NA,NA,NA),
"Questions" = c(NA,NA,"Q1","Q2","Q3",NA,NA,"Q1","Q2","Q3","Q4"),
"Ratings" = c(NA,NA,7,7,7,NA,NA,7,7,7,7), stringsAsFactors = FALSE)
df %>%
as_tibble() -> df # I like to work with tibble
df
#> # A tibble: 11 x 3
#> About Questions Ratings
#> <chr> <chr> <dbl>
#> 1 Ram <NA> NA
#> 2 Std 8 <NA> NA
#> 3 <NA> Q1 7
#> 4 <NA> Q2 7
#> 5 <NA> Q3 7
#> 6 John <NA> NA
#> 7 Std 9 <NA> NA
#> 8 <NA> Q1 7
#> 9 <NA> Q2 7
#> 10 <NA> Q3 7
#> 11 <NA> Q4 7
#I found I can remove a column out from the above tibble, the below function moves the values to the left if there is a NA
t(apply(df, 1, function(x) c(x[!is.na(x)], x[is.na(x)]))) -> df[]
df
#> # A tibble: 11 x 3
#> About Questions Ratings
#> <chr> <chr> <chr>
#> 1 Ram <NA> <NA>
#> 2 Std 8 <NA> <NA>
#> 3 Q1 " 7" <NA>
#> 4 Q2 " 7" <NA>
#> 5 Q3 " 7" <NA>
#> 6 John <NA> <NA>
#> 7 Std 9 <NA> <NA>
#> 8 Q1 " 7" <NA>
#> 9 Q2 " 7" <NA>
#> 10 Q3 " 7" <NA>
#> 11 Q4 " 7" <NA>
df %>%
clean_names() %>% # no capitals
dplyr::select(-ratings) %>% # removing the extra columns
mutate(questions = questions %>% parse_number()) -> df1 # make the second column numeric
df1
#> # A tibble: 11 x 2
#> about questions
#> <chr> <dbl>
#> 1 Ram NA
#> 2 Std 8 NA
#> 3 Q1 7
#> 4 Q2 7
#> 5 Q3 7
#> 6 John NA
#> 7 Std 9 NA
#> 8 Q1 7
#> 9 Q2 7
#> 10 Q3 7
#> 11 Q4 7
# this for loop will get me a vector for the name column which I can use to append it to the df
name <- as.character()
for(i in 1:nrow(df1)){
if(is.na(df1[i,2])){
if(is.na(df1[i+1,2])){
name <- c(name , as.character(df1[i,1]))
} else {
name <- c(name, NA)
}
} else {
name <- c(name, NA)
}
}
name
#> [1] "Ram" NA NA NA NA "John" NA NA NA NA
#> [11] NA
name %>%
enframe(name = NULL, value = "name") -> name_df #converting vector to tibble
name_df
#> # A tibble: 11 x 1
#> name
#> <chr>
#> 1 Ram
#> 2 <NA>
#> 3 <NA>
#> 4 <NA>
#> 5 <NA>
#> 6 John
#> 7 <NA>
#> 8 <NA>
#> 9 <NA>
#> 10 <NA>
#> 11 <NA>
df1 %>%
bind_cols(name_df)%>% #binding the new column to the original df
mutate(std = ifelse(is.na(questions) & is.na(name), about, NA)) %>% # mutating a new column for standard
fill(name) %>% # this will fill the NA with non NA previous value
fill(std) %>%
drop_na(questions) %>% # dropping unnecessary rows
pivot_wider(names_from = "about", values_from = "questions") -> final_df # now I can use pivot_wider to get the expected result
final_df
#> # A tibble: 2 x 6
#> name std Q1 Q2 Q3 Q4
#> <chr> <chr> <dbl> <dbl> <dbl> <dbl>
#> 1 Ram Std 8 7 7 7 NA
#> 2 John Std 9 7 7 7 7
由reprex package(v0.3.0)在2020-06-13创建