library(ggplot2) df_basdai_age <- emmip(basdai_age,~ gender ~ time | age, at = list(age = c(20, 40, 60, 80)), facetlab = "label_both", plotit= FALSE) df_basdai_age <- emmip(basdai_age,~ gender ~ time | age, at = list(age = c(20, 40, 60, 80)), facetlab = "label_both", plotit= FALSE) df_basdai_age$age = as.factor(df_basdai_age$age) levels(df_basdai_age$age) <- c("20 years", "40 years", "60 years", "80 years") df_basdai_age$time = as.numeric(as.character(df_basdai_age$time)) df_basdai_age <- mutate(df_basdai_age, ymin = yvar - qt(0.975, df)*(SE/sqrt(df))) df_basdai_age <- mutate(df_basdai_age, ymax = yvar + qt(0.975, df)*(SE/sqrt(df))) #Plot the graph p <- ggplot(df_basdai_age, aes(x = time, y = yvar, color = tvar)) + geom_line(size = 1) + geom_point() + geom_errorbar(aes(ymin=ymin, ymax=ymax), width=.05) + labs(x = "Time (years)", y = "") + theme_cowplot() + facet_wrap(~ age, nrow = 2) + theme(legend.position="none") # show plot p df_basdai_age <- emmip(basdai_age,~ gender ~ time | age, at = list(age = c(20, 40, 60, 80)), facetlab = "label_both", plotit= FALSE) df_basdai_age <- emmip(basdai_age,~ gender ~ time | age, at = list(age = c(20, 40, 60, 80)), facetlab = "label_both", plotit= FALSE) df_basdai_age$age = as.factor(df_basdai_age$age) levels(df_basdai_age$age) <- c("20 years", "40 years", "60 years", "80 years") df_basdai_age$time = as.numeric(as.character(df_basdai_age$time)) df_basdai_age <- mutate(df_basdai_age, ymin = yvar - qt(0.975, df)*(SE/sqrt(df))) df_basdai_age <- mutate(df_basdai_age, ymax = yvar + qt(0.975, df)*(SE/sqrt(df))) library(ggplot2) df_basdai_age <- emmip(basdai_age,~ gender ~ time | age, at = list(age = c(20, 40, 60, 80)), facetlab = "label_both", plotit= FALSE) df_basdai_age <- emmip(basdai_age,~ gender ~ time | age, at = list(age = c(20, 40, 60, 80)), facetlab = "label_both", plotit= FALSE) df_basdai_age$age = as.factor(df_basdai_age$age) levels(df_basdai_age$age) <- c("20 years", "40 years", "60 years", "80 years") df_basdai_age$time = as.numeric(as.character(df_basdai_age$time)) df_basdai_age <- mutate(df_basdai_age, ymin = yvar - qt(0.975, df)*(SE/sqrt(df))) df_basdai_age <- mutate(df_basdai_age, ymax = yvar + qt(0.975, df)*(SE/sqrt(df))) #Plot the graph p <- ggplot(df_basdai_age, aes(x = time, y = yvar, color = tvar)) + geom_line(size = 1) + geom_point() + geom_errorbar(aes(ymin=ymin, ymax=ymax), width=.05) + labs(x = "Time (years)", y = "") + theme_cowplot() + facet_wrap(~ age, nrow = 2) + theme(legend.position="none") # show plot p Calculating Confidence intervals of marginal means for a linear mixed model using emmeans package
I have built this model in R:
library(emmeans) library(lmerTest) mixed_age_interaction = lmer(basdai ~ 1 + gender + time + age + gender*time + time*age + gender*age + gender*time*age + (1|ID) + (1|country), data = dat, REML = F, control=lmerControl(optimizer="bobyqa")) In this model, I am predicting the treatment response basdai (scale 0 to 100), using gender (male/female), time (0, 0.5, 1 and 2 years, time as a categorical variable), age (18-90 years), and the two-way and three-way interactions of these variables. The linear mixed model takes into account correlations within subjects and subjects within countries.
I used the following code to plot the relationship between sex, time and age:
emmip(basdai_age,~ gender ~ time | age, at = list(age = c(20, 40, 60, 80)), plotit= TRUE) This gives me the following plot (blue = female, red = male): 
The plot looks good, although it may be confusing for viewers to see that time is not plotted at the 1.5 year mark since it is not a measured category. Moreover, it would be helpful to add labels at the top of each subplot indicating the age group being displayed (e.g. "20 years", "40 years", etc.) to make it more clear.
Therefore I have tried to substract the data from the emmip() function by using plotit = FALSE, and subsequently trying to create the plot to my liking. However, I am struggling to calculate the confidence intervals of the estimated marginal means. I asked chatGPT for help which has provided me with the formula, but the CI's seem to small.
Here is what I have tried so far:
df_basdai_age <- emmip(basdai_age,~ gender ~ time | age, at = list(age = c(20, 40, 60, 80)), facetlab = "label_both", plotit= FALSE) df_basdai_age <- emmip(basdai_age,~ gender ~ time | age, at = list(age = c(20, 40, 60, 80)), facetlab = "label_both", plotit= FALSE) df_basdai_age$age = as.factor(df_basdai_age$age) levels(df_basdai_age$age) <- c("20 years", "40 years", "60 years", "80 years") df_basdai_age$time = as.numeric(as.character(df_basdai_age$time)) df_basdai_age <- mutate(df_basdai_age, ymin = yvar - qt(0.975, df)*(SE/sqrt(df))) df_basdai_age <- mutate(df_basdai_age, ymax = yvar + qt(0.975, df)*(SE/sqrt(df))) Here is the complete dataset:
structure(list(gender = structure(c(1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L), levels = c("Male", "Female" ), class = "factor"), time = c(0, 0, 0, 0, 0, 0, 0, 0, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2), age = structure(c(1L, 1L, 2L, 2L, 3L, 3L, 4L, 4L, 1L, 1L, 2L, 2L, 3L, 3L, 4L, 4L, 1L, 1L, 2L, 2L, 3L, 3L, 4L, 4L, 1L, 1L, 2L, 2L, 3L, 3L, 4L, 4L), levels = c("20 years", "40 years", "60 years", "80 years"), class = "factor"), yvar = c(53.2222132545591, 55.644671238144, 53.979760945038, 58.0003674613788, 54.737308635517, 60.3560636846136, 55.4948563259959, 62.7117599078483, 18.8785653284777, 27.3140001897482, 25.9035670388476, 33.6311637386693, 32.9285687492176, 39.9483272875904, 39.9535704595875, 46.2654908365116, 17.1280381612022, 25.6617577361121, 23.8414745169542, 31.7675512043731, 30.5549108727061, 37.8733446726342, 37.268347228458, 43.9791381408953, 15.8476044974251, 25.0206920526655, 23.029545900933, 30.9838114430519, 30.2114873044409, 36.9469308334382, 37.3934287079488, 42.9100502238246), SE = c(1.48085388286971, 1.54315837900391, 1.41163641601991, 1.42678010979953, 1.4630053643472, 1.49892114510165, 1.62355480278176, 1.73623936576435, 1.47800196744245, 1.54052855366631, 1.41101870318731, 1.42580231659991, 1.46095633844491, 1.4978877623696, 1.61701847301061, 1.73363627458762, 1.48984952535246, 1.58248569215204, 1.41398647326204, 1.43575219607306, 1.47296775812952, 1.52128548685686, 1.65241653887761, 1.80638768204503, 1.51339032203046, 1.65536350490358, 1.41906539783371, 1.45202624858626, 1.4940999289118, 1.56932417176622, 1.71642484529658, 1.95023658100379), df = c(19.1992529938252, 22.636661796014, 15.8557024206515, 16.5459552649169, 18.2937320570655, 20.157114181664, 27.7433329713143, 36.2839788594381, 19.051888835057, 22.483155277679, 15.828184146664, 16.5012376120077, 18.1918644929504, 20.10278950016, 27.3001430927299, 36.0701453085591, 19.669221418473, 25.0336574427524, 15.9616355502659, 16.9666087914172, 18.7980095965655, 21.3885230873732, 29.7715565509346, 42.514368203954, 20.9429853491516, 29.973619949575, 16.1930278685159, 17.7496566912749, 19.9012388133609, 24.2214091721269, 34.6602465546453, 57.7544903350949), tvar = structure(c(1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L), levels = c("Male", "Female"), class = "factor"), xvar = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L), levels = c("0", "0.5", "1", "2"), class = "factor"), ymin = c(52.5153436652014, 54.9731205471491, 53.2276742716765, 57.258776742029, 54.0195058924355, 59.6599914019999, 54.8631940181502, 62.1273437446853, 18.1699669372756, 26.6410506766869, 25.1510488995009, 32.8889200571732, 32.2094835380351, 39.2516759650496, 39.3188965541991, 45.6801042113467, 16.4265441774652, 25.0104024242387, 23.0910485329519, 31.0320361368517, 29.8433247363632, 37.1900266089296, 36.649658499789, 43.4202483189333, 15.1597661980371, 24.4031685566235, 22.2826943822545, 30.2589939247067, 29.5126365026472, 36.2891347676435, 36.8013488320248, 42.3963183501654), ymax = c(53.9290828439169, 56.316221929139, 54.7318476183996, 58.7419581807286, 55.4551113785984, 61.0521359672273, 56.1265186338416, 63.2961760710114, 19.5871637196797, 27.9869497028096, 26.6560851781943, 34.3734074201655, 33.6476539604, 40.6449786101312, 40.5882443649759, 46.8508774616764, 17.8295321449393, 26.3131130479854, 24.5919005009564, 32.5030662718946, 31.2664970090489, 38.5566627363388, 37.8870359571269, 44.5380279628573, 16.5354427968132, 25.6382155487075, 23.7763974196115, 31.7086289613971, 30.9103381062346, 37.6047268992329, 37.9855085838727, 43.4237820974838)), estName = "yvar", pri.vars = c("gender", "time", "age"), adjust = "none", side = 0, delta = 0, type = "link", mesg = "Degrees-of-freedom method: satterthwaite", row.names = c(1L, 2L, 9L, 10L, 17L, 18L, 25L, 26L, 3L, 4L, 11L, 12L, 19L, 20L, 27L, 28L, 5L, 6L, 13L, 14L, 21L, 22L, 29L, 30L, 7L, 8L, 15L, 16L, 23L, 24L, 31L, 32L), labs = list(xlab = "Levels of time", ylab = "Linear prediction", tlab = "gender"), vars = list( byvars = "age", tvars = "gender"), class = "data.frame") Question: If I have the standard error, marginal mean, degrees of freedom, how do I calculate the ymin and ymax for a marginal mean of a linear mixed model? Is the formula used here correct?
