# NOT RUN { # Basic connection using DBI ------------------------------------------- if (require(dbplyr, quietly = TRUE)) { con <- DBI::dbConnect(RSQLite::SQLite(), ":memory:") copy_to(con, mtcars) DBI::dbListTables(con) # To retrieve a single table from a source, use `tbl()` con %>% tbl("mtcars") # You can also use pass raw SQL if you want a more sophisticated query con %>% tbl(sql("SELECT * FROM mtcars WHERE cyl == 8")) # To show off the full features of dplyr's database integration, # we'll use the Lahman database. lahman_sqlite() takes care of # creating the database. lahman_p <- lahman_sqlite() batting <- lahman_p %>% tbl("Batting") batting # Basic data manipulation verbs work in the same way as with a tibble batting %>% filter(yearID > 2005, G > 130) batting %>% select(playerID:lgID) batting %>% arrange(playerID, desc(yearID)) batting %>% summarise(G = mean(G), n = n()) # There are a few exceptions. For example, databases give integer results # when dividing one integer by another. Multiply by 1 to fix the problem batting %>% select(playerID:lgID, AB, R, G) %>% mutate( R_per_game1 = R / G, R_per_game2 = R * 1.0 / G ) # All operations are lazy: they don't do anything until you request the # data, either by `print()`ing it (which shows the first ten rows), # or by `collect()`ing the results locally. system.time(recent <- filter(batting, yearID > 2010)) system.time(collect(recent)) # You can see the query that dplyr creates with show_query() batting %>% filter(G > 0) %>% group_by(playerID) %>% summarise(n = n()) %>% show_query() } # } Run the code above in your browser using DataLab