Run DrVAEN on dataset: GSE32646


1. Dataset summary

The purpose of this study was to investigate the association of glutathione S-transferase P1 (GSTP1) expression with resistance to neoadjuvant paclitaxel followed by 5-fluorouracil/epirubicin/cyclophosphamide (P-FEC) in human breast cancers.

2. Get and prepare dataset

The dataset can be accessed using the code:

  #setwd("/path/to/GSE32646")
  library("GEOquery")
  library("limma")

  ###################################################################################################
  # code chunk: GSE32646
  ###################################################################################################
  rm(list=ls())

  gse <- getGEO("GSE32646", GSEMatrix = TRUE)
  exprs(gse[[1]]) -> datExpr0

  ########## section 1

  library('preprocessCore')
  normalize.quantiles(datExpr0) -> datExprLQ
  dimnames(datExprLQ) = dimnames(datExpr0)

  ########## section 1 end

  ########## section 2
  gpl = getGEO("GPL570")
  Table(gpl) -> anno

  anno[,11] -> ss
  grep("/", anno[,11] ) -> ii
  anno = anno[-ii, ]

  datExprLQ = datExprLQ[which(rownames(datExprLQ) %in% anno[,1]),]

  match(rownames(datExprLQ), anno[,1]) -> ii
  symbol = anno[ii, 11]

  apply(datExprLQ, 2, function(u)tapply(u, symbol, median)) -> expr.mat
  expr.mat = expr.mat[-1,]

  pData(gse[[1]]) -> pheno.anno

  save(expr.mat, pheno.anno, file="GSE32646.RData")

  write.table(format(expr.mat, digits=4),file="GSE32646.Expr.tsv", row.names=T, quote=F, sep="\t")
              

The genes will be mapped to our backend gene list that are used for drug response prediction. The full gene list is here

3. Predict drug responses using DrVAEN.

Once the gene expression matrix file is genereted, we can upload the file to DrVAEN to get the predicted drug response data.

Then download the predicted responses for further analysis.

4. Further analysis: Compare the drug responses between the two groups (pCR vs RD).

Here we present the example codes for the analysis.

library(glmnet)
load("GSE32646.RData")

## CCLE
drug_response = read.table("ranksigmoid_CCLE.pred.txt", as.is=T, header=T)

### all
dat = data.frame(cbind(Response = drug_response[, "Paclitaxel"], pCR=pheno.anno[,50]))
dat[,1] = as.numeric(as.character(dat[,1]))

pvalue = t.test(dat[,1] ~ dat[,2])$p.value

##### plot
library("ggplot2")

give.n <- function(x){
   return(c(y = max(x), label = length(x)))
}

p1 = ggplot(dat, aes(x=pCR, y=Response, fill=pCR)) + geom_boxplot() + 
     labs(title=paste("GSE32646, CCLE\n", "p = ", format(pvalue, digits=3)), x="pCR Status", y = "Predicted Response to Paclitaxel") +
   theme(legend.position = "none", plot.title = element_text(hjust=0.5)) +
   stat_summary(fun.data = give.n, geom = "text")

png("CCLE.png")
print(p1)
dev.off()

## To analyse the predicted drug response data from GDSC model, please just change "CCLE" to "GDSC" in the above codes.