if(!dir.exists(here::here('data')))
dir.create(here::here('data'))
Downloading GEO data
Aims
In this report, I download the 3409 breast cancer bulk RNA-seq samples and the corresponding clinical annotations, associated with the study: Clinical Value of RNA Sequencing-Based Classifiers for Prediction of the Five Conventional Breast Cancer Biomarkers: A Report From the Population-Based Multicenter Sweden Cancerome Analysis Network—Breast Initiative (GSE96058).
Get clinical features
## get geo obj
geo <- getGEO(GEO = "GSE96058", GSEMatrix = FALSE)
## fetch metadata
meta <- purrr::map(geo@gsms, ~.x@header$characteristics_ch1) |>
stack() |>
tidyr::separate(values, into = c("feature", "value"), sep= ": ") |>
tidyr::pivot_wider(names_from= feature, values_from = value) |>
janitor::clean_names()
## map samples
sample <- purrr::map(geo@gsms, ~.x@header$title) |>
stack() |>
as_tibble() |>
mutate(ind = as.character(ind))
## store metadata
meta <- left_join(sample, meta, by = 'ind') |>
write_csv(here::here("data/metadata.csv"))
Get gene expression profiles
success <- FALSE
attempt <- 1
while (!success && attempt <= 5) {
tryCatch({
getGEOSuppFiles("GSE96058", makeDirectory = FALSE,
baseDir = here::here('data'),
fetch_files = TRUE, filter_regex = 'gene_expression')
success <- TRUE
message("Download successful on attempt ", attempt)
}, error = function(e) {
message("Download failed on attempt ", attempt, ": ", e$message)
attempt <<- attempt + 1
Sys.sleep(5) # wait before retrying
})
}
if (!success)
stop(sprintf("Download failed after %s attempts", attempt))