Packages
library(ggplot2) library(dplyr) library(caret) library(plotROC) library(pROC) library(ROCR) 3 regression models
modele1 <- glm(USI3 ~ SBEcl3 + AGE3 + SEXE3 + RL3 + ATB3 + OXYGENE3 + NEW3 , data = data3, family = binomial) modele2 <- glm(USI3 ~ LACTATES3 + AGE3 + SEXE3 + RL3 + ATB3 + OXYGENE3 + NEW3 , data = data3, family = binomial) modele3 <- glm(USI3 ~ SOFA3 + AGE3 + SEXE3 + RL3 + ATB3 + OXYGENE3 + NEW3 , data = data3, family = binomial) (data3 defined at end of post)
Prediction models
predict(modele1) predict(modele2) predict(modele3) 3 ROC Curves
rocs <- list() rocs[["modele1"]] <- roc(data3$USI3, predict(modele1)) rocs[["modele2"]] <- roc(data3$USI3, predict(modele2)) rocs[["modele3"]] <- roc(data3$USI3, predict(modele3)) ggroc (rocs, legacy.axes = TRUE) Results
I want to determine the best discriminative score (SBE3, LACT3, SOF3) for predict risk of USI admission(USI3). I transformed these continuous variables into qualitative and incorporated them into my models. I make my prediction from my regression models, theses prediction are very poor (AUC1 = 0.74, AUC2 = 0.72, AUC3 = 0.79).
Does this method seem appropriate to you? How to have a better prediction? Do you have another method to suggest to me? How to select in advance the threshold values of the different predictive factors for admission to the ICU using the ROC curve analysis?
This is my data
structure(list(USI3 = structure(c(2L, 1L, 2L, 2L, 1L, 2L, 1L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 1L, 2L, 1L, 2L, 1L, 1L), levels = c("non", "oui"), class = "factor"), SBEcl3 = structure(c(4L, 3L, 3L, 1L, 3L, 4L, 4L, 1L, 2L, 4L, 4L, 2L, 1L, 4L, 2L, 2L, 3L, 2L, 4L, 4L ), levels = c("Q3", "Q1", "Q2", "Q4"), class = "factor"), VBEcl3 = structure(c(3L, 2L, 1L, 3L, 1L, 3L, 3L, 1L, 2L, 3L, 3L, 2L, 1L, 3L, 2L, 2L, 1L, 2L, 3L, 3L), levels = c("Q2", "Q1", "Q3"), class = "factor"), SOFA3 = structure(c(1L, 2L, 3L, 3L, 1L, 3L, 2L, 3L, 3L, 1L, 2L, 2L, 2L, 1L, 2L, 3L, 1L, 2L, 2L, 2L), levels = c("Q1", "Q2", "Q3"), class = "factor"), LACTATES3 = structure(c(2L, 3L, 1L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 2L, 1L, 1L, 3L, 3L, 3L, 1L, 1L), levels = c("Q1", "Q2", "Q3"), class = "factor"), SBE3 = c(5.88755625371722, -5.83367949101861, -5.16855590395164, 1.94584567145157, -4.46237190410756, 2.92799867956303, 6.99250801087927, 0.414319711015225, -16.1438604651896, 5.80900771418004, 5.08271450621078, -7.1973116153256, -1.30327738092311, 4.05722685252242, -15.2431163349488, -18.1380051674948, -4.84876594310452, -11.5092300657808, 9.46936571654563, 16.4607368929384), VBE3 = c(4.83306549999999, -5.17690968000001, -3.96411408000001, 4.33224666999999, -3.45727284, 1.75496799999999, 7.36360999999999, 0.618853640000002, -17.29309464, 5.33871410999998, 6.48677951999998, -4.91616858000001, -0.740172520000016, 6.13709857999999, -15.35942096, -18.24841597, -3.61614233000002, -12.1641, 10.34413375, 19.15572), SOF3 = c(2, 5, 6, 6, 2, 6, 4, 8, 14, 1, 3, 5, 3, 2, 4, 11, 2, 5, 4, 4), LACT3 = c(1.9, 3.4, 1.4, 0.8, 1.9, 18.4, 0.5, 2.2, 10.8, 0.8, 2.4, 7.1, 1.9, 1.5, 1.1, 19, 3.1, 4.7, 1.1, 1), RL3 = structure(c(2L, 2L, 2L, 1L, 1L, 2L, 1L, 2L, 2L, 1L, 2L, 1L, 2L, 1L, 1L, 2L, 1L, 2L, 1L, 1L), levels = c("non", "oui"), class = "factor"), OXYGENE3 = structure(c(2L, 2L, 2L, 2L, 1L, 2L, 2L, 2L, 1L, 2L, 2L, 2L, 2L, 1L, 1L, 2L, 1L, 2L, 2L, 2L), levels = c("non", "oui"), class = "factor"), NEW3 = c(5, 6, 8, 6, 2, 11, 6, 7, 6, 7, 12, 5, 10, 2, 1, 10, 5, 4, 4, 8), AGE3 = c(88, 53, 71, 73, 58, 71, 83, 77, 58, 77, 35, 75, 57, 64, 68, 52, 58, 81, 78, 77), ATB3 = structure(c(2L, 2L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 2L, 2L, 1L), levels = c("non", "oui"), class = "factor"), SEXE3 = structure(c(1L, 1L, 2L, 2L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 1L, 2L, 1L, 2L, 2L, 2L, 1L, 1L), levels = c("Femme", "Homme"), class = "factor")), row.names = c(NA, 20L), class = "data.frame") 