############################## ############################## ## ABRF iPRG2015 study : Protein siginificance analysis with MSstats ## Date: March 18, 2017 ## Created by Meena Choi ############################## ############################## ############################## ## 0. Load MSstats ############################## library(MSstats) ?MSstats ############################## ## 1. Read data ############################## # read skyline output raw <- read.csv(file="iPRG_10ppm_2rt_15cut_nosingle.csv") # read annotation table annot <- read.csv("iPRG_skyline_annotation.csv", header=TRUE) annot # reformating and pre-processing for Skyline output. quant <- SkylinetoMSstatsFormat(raw, annotation=annot) head(quant) ############################## ## 2. Processing data ## Transformation = log2 ## Normalization = equalize median ## Model-based run-level summarization (TMP) after imputation ############################## quant.processed <- dataProcess(raw = quant, logTrans=2, normalization = 'equalizeMedians', summaryMethod = 'TMP', MBimpute=TRUE, censoredInt='0', cutoffCensored='minFeature', maxQuantileforCensored = 0.999) # show the name of outputs names(quant.processed) # show reformated and normalized data. # 'ABUNDANCE' column has normalized log2 transformed intensities. head(quant.processed$ProcessedData) # This table includes run-level summarized log2 intensities. (column : LogIntensities) # Now one summarized log2 intensities per Protein and Run. # NumMeasuredFeature : show how many features are used for run-level summarization. # If there is no missing value, it should be the number of features in certain protein. # MissingPercentage : the number of missing features / the number of features in certain protein. head(quant.processed$RunlevelData) # show which summarization method is used. head(quant.processed$SummaryMethod) ############################## ## 3. Data visualization ############################## dataProcessPlots(data = quant.processed, type="QCplot", width=7, height=7, which.Protein = 1, address='iPRG_skyline_equalizeNorm_') # It will generate profile plots per protein. It will take a while dataProcessPlots(data = quant.processed, type="Profileplot", featureName="NA", width=7, height=7, summaryPlot = TRUE, originalPlot = FALSE, address="iPRG_skyline_equalizeNorm_") # Instead, make profile plot for only some. dataProcessPlots(data = quant.processed, type="Profileplot", featureName="NA", width=7, height=7, which.Protein = 'sp|P44015|VAC2_YEAST', address="iPRG_skyline_equalizeNorm_P44015") # sp|P55752|ISCB_YEAST # sp|P44374|SFG2_YEAST # sp|P44983|UTR6_YEAST # sp|P44683|PGA4_YEAST # sp|P55249|ZRT4_YEAST dataProcessPlots(data = quant.processed, type="conditionplot", width=7, height=7, address="iPRG_skyline_equalizeNorm_") ############################## ## 4. Model-based comparison and adjustment for multiple testing ############################## unique(quant.processed$ProcessedData$GROUP_ORIGINAL) comparison1<-matrix(c(-1,1,0,0),nrow=1) comparison2<-matrix(c(-1,0,1,0),nrow=1) comparison3<-matrix(c(-1,0,0,1),nrow=1) comparison4<-matrix(c(0,-1,1,0),nrow=1) comparison5<-matrix(c(0,-1,0,1),nrow=1) comparison6<-matrix(c(0,0,-1,1),nrow=1) comparison<-rbind(comparison1, comparison2, comparison3, comparison4, comparison5, comparison6) row.names(comparison)<-c("C2-C1","C3-C1","C4-C1","C3-C2","C4-C2","C4-C3") test <- groupComparison(contrast.matrix=comparison, data=quant.processed) names(test) # Show test result # Label : which comparison is reported. # log2FC : estimated log2 fold change between conditions. # adj.pvalue : adjusted p value by BH # issue : detect whether this protein has any issue for comparison # such as, there is measurement in certain group, or no measurement at all. # MissingPercentage : the number of missing intensities/total number of intensities # in conditions your are interested in for comparison # ImputationPercentage : the number of imputed intensities/total number of intensities # in conditions your are interested in for comparison head(test$ComparisonResult) # After fitting linear model, residuals and fitted values can be shown. head(test$ModelQC) # Fitted model per protein head(test$fittedmodel) # save testing result as .csv file Skyline.intensity.comparison.result <- test$ComparisonResult write.csv(Skyline.intensity.comparison.result, file='testResult_iprg_skyline.csv') ############################## ## 5. Visualization for testing result ############################## groupComparisonPlots(Skyline.intensity.comparison.result, type="VolcanoPlot", address="testResult_iprg_skyline_") groupComparisonPlots(data = Skyline.intensity.comparison.result, type = 'VolcanoPlot', sig = 0.05, FCcutoff = 2^2, address = 'testResult_iprg_skyline_FCcutoff4_') groupComparisonPlots(Skyline.intensity.comparison.result, type="Heatmap", address="testResult_iprg_skyline_") groupComparisonPlots(Skyline.intensity.comparison.result, type="ComparisonPlot", address="testResult_iprg_skyline_") ############################## ## 6. Verify the model assumption ############################## # normal quantile-quantile plots modelBasedQCPlots(data=test, type="QQPlots", width=5, height=5, address="iprg_skyline_") # residual plots modelBasedQCPlots(data=test, type="ResidualPlots", width=5, height=5, address="iprg_skyline_") ############################## ## 7. Power calculation ############################## test.power <- designSampleSize(data = test$fittedmodel, desiredFC = c(1.1, 1.6), FDR = 0.05, power = TRUE, numSample = 3) test.power designSampleSizePlots(data = test.power) ############################## ## 8. Sample size calculation ############################## samplesize <- designSampleSize(data = test$fittedmodel, desiredFC = c(1.1, 1.6), FDR = 0.05, power = 0.9, numSample = TRUE) samplesize designSampleSizePlots(data = samplesize) ############################## ## 9. sample quantification ############################## sampleQuant <- quantification(quant.processed) head(sampleQuant)