R timeout with read.table() and large datafiles

I tried out everything but only one trick worked.

1: 2: 3: 4: 5: 6: 7: 8: 9: 10: 11: 12: 13: 14: 15: 16: 17: 18: 19: 20: 21: 22: 23: 24: 25: 26: 27: 28: 29: 30: 31: 32: 34: # ------------------------------------------ # Gabriel NEJM suppl data data # m@wjst.de 29Sep10 # ------------------------------------------ d <- c("/Users/.../NEJM/") f1 <- c("/Users/... /36studies_format_repository_NEJM.txt") # all good tricks: NULL columns, explicit colclasses and comment characters cc <- rep("NULL",276) cc[c(2,4,5)] <- c("character") ds <- read.table(file=f1, sep="\t", header=TRUE, na.string="", dec=".", comment.char="", colClasses=cc) # but does not run due to memory problems with 567.589 rows and 276 cols # another trick: read only certain columns ds <- read.table( pipe("cut -f1,2,3,4 /Users/... /36studies_format_repository_NEJM.txt") # this works but is not very handy... # clever: make a SQL dataset on the fly - found at code.google.com/p/sqldf/#Example_6._File_Input library(sqldf)) nejm <- file(f1) ds <- sqldf("select * from nejm limit 10", file.format = list(header = TRUE) ) # but also this throws an error or silently crashes R # converting to SQLITE outside R finally worked ... try(system("sqlite nejm.sqlite3 <nejm_make.sql", intern = TRUE, ignore.stderr = TRUE)) library(RSQLite) SQLite(max.con = 16, fetch.default.rec = 100000, force.reload = FALSE, shared.cache=FALSE) con <- dbDriver("SQLite") dbc <- dbConnect(con, dbname = "/Users/.../nejm.sqlite3") SQL <- c("SELECT * FROM nejm") rs <- dbSendQuery(dbc,SQL) nejm <- fetch(rs, n = -1)