r - Use multiple predictors in linear model with purrr map2 function -
my question similar this one, trying use model multiple predictors , can't figure out how newdata predict function.
library(dplyr) library(lubridate) library(purrr) library(tidyr) library(broom) set.seed(1234) first create seq of weeks
wks = seq(as.date("2010-01-01"), sys.date(), by="1 week") then grab current year
cur_year <- year(sys.date()) here create data frame dummy data
my_data <- data.frame( week_ending = wks ) %>% mutate( ref_period = week(week_ending), yr = year(week_ending), pct.excellent = round(runif(length(wks), 0, 100),0), pct.good = round(runif(length(wks), 0, 100),0), pct.fair = round(runif(length(wks), 0, 100),0), pct.poor = round(runif(length(wks), 0, 100),0), pct.very.poor = round(runif(length(wks), 0, 100),0), pct_trend = round(runif(length(wks), 75, 125),0) ) next create nested dataframe has data each week of year 1 group.
cond_model <- my_data %>% filter(yr != cur_year) %>% group_by(ref_period) %>% nest(.key=cond_data) here join year's data previous years' data week of year.
cond_model <- left_join( cond_model, my_data %>% filter(yr==cur_year) %>% select(week_ending, ref_period, pct.excellent, pct.fair, pct.good, pct.poor, pct.very.poor), = c("ref_period") ) and adds linear model data frame each week of year
cond_model <- cond_model %>% mutate(model = map(cond_data, ~lm(pct_trend ~ pct.excellent + pct.good + pct.fair + pct.poor + pct.very.poor, data = .x))) now use model each week predict using year's data. tried following:
cond_model <- cond_model %>% mutate( pred_pct_trend = map2_dbl(model, pct.excellent + pct.good + pct.fair + pct.poor + pct.very.poor, ~predict(.x, newdata = data.frame(.y))) ) that gives following error:
error in mutate_impl(.data, dots) : object 'pct.excellent' not found i tried nesting predictors in data frame...
create data frame year's data , nest predictors
cur_cond <- my_data %>% filter(yr==cur_year) %>% select(week_ending, pct.excellent, pct.good, pct.fair, pct.poor, pct.very.poor) %>% group_by(week_ending) %>% nest(.key=new_data) %>% mutate(new_data=map(new_data, ~data.frame(.x))) join main data frame
cond_model <- left_join(cond_model, cur_cond) now try prediction again:
cond_model <- cond_model %>% mutate( pred_pct_trend = map2_dbl(model, new_data, ~predict(.x, newdata = data.frame(.y))) ) i same error before:
error in mutate_impl(.data, dots) : object 'pct.excellent' not found i think answer involve performing flatten() on predictors, can't figure out goes in workflow.
cond_model$new_data[1] vs.
flatten_df(cond_model$new_data[1]) and @ point have run out of ideas.
once prediction dataset added in, main issue how deal weeks don't have prediction data (weeks 31-53).
you'll see when join 2 datasets, rows without prediction dataset filled null. can use ifelse statement give predictions of na these rows.
# modeling data cond_model = my_data %>% filter(yr != cur_year) %>% group_by(ref_period) %>% nest(.key = cond_data) # create prediction data cur_cond = my_data %>% filter(yr == cur_year) %>% group_by(ref_period) %>% nest( .key = new_data ) # join these cond_model = left_join(cond_model, cur_cond) # models cond_model = cond_model %>% mutate(model = map(cond_data, ~lm(pct_trend ~ pct.excellent + pct.good + pct.fair + pct.poor + pct.very.poor, data = .x) ) ) put ifelse in return na when there no prediction data.
# predictions cond_model %>% mutate(pred_pct_trend = map2_dbl(model, new_data, ~ifelse(is.null(.y), na, predict(.x, newdata = .y) ) ) ) # tibble: 53 x 5 ref_period cond_data new_data model pred_pct_trend <dbl> <list> <list> <list> <dbl> 1 1 <tibble [7 x 8]> <tibble [1 x 8]> <s3: lm> 83.08899 2 2 <tibble [7 x 8]> <tibble [1 x 8]> <s3: lm> 114.39089 3 3 <tibble [7 x 8]> <tibble [1 x 8]> <s3: lm> 215.02055 4 4 <tibble [7 x 8]> <tibble [1 x 8]> <s3: lm> 130.24556 5 5 <tibble [7 x 8]> <tibble [1 x 8]> <s3: lm> 112.86516 6 6 <tibble [7 x 8]> <tibble [1 x 8]> <s3: lm> 107.29866 7 7 <tibble [7 x 8]> <tibble [1 x 8]> <s3: lm> 52.11526 8 8 <tibble [7 x 8]> <tibble [1 x 8]> <s3: lm> 106.22482 9 9 <tibble [7 x 8]> <tibble [1 x 8]> <s3: lm> 128.40858 10 10 <tibble [7 x 8]> <tibble [1 x 8]> <s3: lm> 108.10306
Comments
Post a Comment