Skip to contents
library(cd8ippred)
library(Biobase)
#> Loading required package: BiocGenerics
#> 
#> Attaching package: 'BiocGenerics'
#> The following objects are masked from 'package:stats':
#> 
#>     IQR, mad, sd, var, xtabs
#> The following objects are masked from 'package:base':
#> 
#>     anyDuplicated, aperm, append, as.data.frame, basename, cbind,
#>     colnames, dirname, do.call, duplicated, eval, evalq, Filter, Find,
#>     get, grep, grepl, intersect, is.unsorted, lapply, Map, mapply,
#>     match, mget, order, paste, pmax, pmax.int, pmin, pmin.int,
#>     Position, rank, rbind, Reduce, rownames, sapply, setdiff, sort,
#>     table, tapply, union, unique, unsplit, which.max, which.min
#> Welcome to Bioconductor
#> 
#>     Vignettes contain introductory material; view with
#>     'browseVignettes()'. To cite Bioconductor, see
#>     'citation("Biobase")', and for packages 'citation("pkgname")'.
library(dplyr)
#> 
#> Attaching package: 'dplyr'
#> The following object is masked from 'package:Biobase':
#> 
#>     combine
#> The following objects are masked from 'package:BiocGenerics':
#> 
#>     combine, intersect, setdiff, union
#> The following objects are masked from 'package:stats':
#> 
#>     filter, lag
#> The following objects are masked from 'package:base':
#> 
#>     intersect, setdiff, setequal, union
library(tidyr)
library(ggplot2)

This is an example of how to apply CD8 immune phenotype model to data.

Warning: Only apply this to RNA-Seq data processed with Roche-internal RNA-Seq pipeline (e.g., processed EDIS data). When in doubt, compare your data distribution to the example data.

We are using simulated data sampled from the training set. First we need Variance-stabilizing transformation.

cd8ip_simulated_counts <- load_simulated_counts()
vst_m <- compute_vst(exprs(cd8ip_simulated_counts))
#> Warning: replacing previous import 'S4Arrays::makeNindexFromArrayViewport' by
#> 'DelayedArray::makeNindexFromArrayViewport' when loading 'SummarizedExperiment'
#> converting counts to integer mode

Create a data.frame.

# set rownames to symbol names where available;
# the model uses only a subset of genes with symbols available.
rownames(vst_m) <- ifelse(
    is.na(fData(cd8ip_simulated_counts)$symbol) | fData(cd8ip_simulated_counts)$symbol == "",
    rownames(vst_m),
    fData(cd8ip_simulated_counts)$symbol
)

# make rownames unqiue; the model uses only a subset of genes
# without symbol duplication issues
rownames(vst_m) <- make.names(rownames(vst_m), unique = TRUE)

# transform to a data.frame()
vst_df <- as.data.frame(t(vst_m))
# data preview
vst_df[1:6, 1:6]
#>        TSPAN6     TNMD     DPM1    SCYL3  C1orf112      FGR
#> ffc0 10.63564 5.578913 10.95236 9.547618  9.847032 9.118320
#> fd98 11.15683 5.691649 11.51214 9.101428 10.451604 7.894474
#> 46d7  9.26342 5.927867 11.25951 9.320227 11.390358 8.041969
#> c877  9.42878 5.283350 10.94896 9.794054 10.569744 7.353412
#> eddf 11.56120 5.283350 11.14481 9.083383  9.989589 7.390809
#> 3dba 10.66487 6.326243 10.20469 8.059314 10.308865 8.394831

Predict immune phenotypes

pred <- predict_cd8ip(vst_df)
pred$truth <- cd8ip_simulated_counts$CD8IMMPH |>
  tolower() |>
  # we convert truth to a factor for yardstick ROC computations
  factor()

Prediction quality

# values above 6 should be interpreted with caution
summary(pred$n_out_of_range)
#>    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
#>  0.0000  0.0000  0.0000  0.0375  0.0000  1.0000

# low-confidence predictions are set to NA
summary(is.na(pred$predicted.class))
#>    Mode   FALSE 
#> logical      80

Confusion matrix

with(
  pred,
  table(truth, predicted.class)
)
#>           predicted.class
#> truth      desert excluded inflamed
#>   desert       15        5        0
#>   excluded      1       18        1
#>   inflamed      0        2       18

View data.frame with predictions

head(pred)
#>      prob.inflamed prob.excluded prob.desert predicted.class n_out_of_range
#> ffc0    0.10191362     0.1731693   0.7249171          desert              0
#> fd98    0.21714803     0.5062805   0.2765715        excluded              0
#> 46d7    0.19156818     0.5225662   0.2858657        excluded              0
#> c877    0.15159548     0.2483887   0.6000158          desert              0
#> eddf    0.10275997     0.3010333   0.5962067          desert              0
#> 3dba    0.05888132     0.2506064   0.6905123          desert              0
#>       truth
#> ffc0 desert
#> fd98 desert
#> 46d7 desert
#> c877 desert
#> eddf desert
#> 3dba desert

AUC scores

Please note, this is not real data. These AUC scores are only an example.

auc_df <- pred |>
  filter(!is.na(truth)) |>
  pivot_longer(starts_with("prob."), names_prefix = "prob.", names_to = ".level", values_to = "prob") |>
  mutate(positive = as.factor(truth == .level)) |>
  group_by(.level) |>
  yardstick::roc_auc(positive, prob, event_level = "second") |>
  rename(AUC = .estimate, class = .level) |>
  select(class, AUC) %>%
  mutate(AUC_text = paste0("AUC=", round(AUC, 3)))

select(auc_df, -AUC_text)
#> # A tibble: 3 × 2
#>   class      AUC
#>   <chr>    <dbl>
#> 1 desert   0.945
#> 2 excluded 0.854
#> 3 inflamed 0.996

ROC curve

pheno_col <- c(
  inflamed = "#d95f02",
  excluded = "#1b9e77",
  desert = "#7570b3"
)

pred |>
  filter(!is.na(truth)) |>
  # column names should match levels of the truth factor
  yardstick::roc_curve(truth, all_of(paste0("prob.", levels(pred$truth)))) |>
  mutate(class = .level) |>
  ggplot() +
  aes(x = 1 - specificity, y = sensitivity, col = class) +
  geom_path() +
  geom_abline(lty = 3) +
  coord_equal() +
  geom_text(aes(label = AUC_text), x = 0.55, y = 0.1, data = auc_df, show.legend = FALSE) +
  facet_wrap(~class, ncol = 2) +
  scale_color_manual(values = pheno_col) +
  theme_bw()

Session info
devtools::session_info()
#> ─ Session info ───────────────────────────────────────────────────────────────
#>  setting  value
#>  version  R version 4.3.1 (2023-06-16)
#>  os       Ubuntu 22.04.3 LTS
#>  system   x86_64, linux-gnu
#>  ui       X11
#>  language en
#>  collate  C.UTF-8
#>  ctype    C.UTF-8
#>  tz       UTC
#>  date     2023-09-05
#>  pandoc   2.19.2 @ /usr/bin/ (via rmarkdown)
#> 
#> ─ Packages ───────────────────────────────────────────────────────────────────
#>  ! package              * version   date (UTC) lib source
#>  P abind                  1.4-5     2016-07-21 [?] RSPM (R 4.3.0)
#>  P assertthat             0.2.1     2019-03-21 [?] RSPM (R 4.3.0)
#>  P Biobase              * 2.60.0    2023-04-25 [?] Bioconductor
#>  P BiocGenerics         * 0.46.0    2023-04-25 [?] Bioconductor
#>  P BiocManager            1.30.22   2023-08-08 [?] RSPM (R 4.3.0)
#>  P BiocParallel           1.34.2    2023-05-22 [?] Bioconductor
#>  P bitops                 1.0-7     2021-04-24 [?] RSPM (R 4.3.0)
#>  P bslib                  0.5.1     2023-08-11 [?] RSPM (R 4.3.0)
#>  P cachem                 1.0.8     2023-05-01 [?] RSPM (R 4.3.0)
#>  P callr                  3.7.3     2022-11-02 [?] RSPM (R 4.3.0)
#>    cd8ippred            * 0.1.0     2023-09-05 [1] local
#>  P cli                    3.6.1     2023-03-23 [?] RSPM (R 4.3.0)
#>  P codetools              0.2-19    2023-02-01 [?] CRAN (R 4.3.1)
#>  P colorspace             2.1-0     2023-01-23 [?] RSPM (R 4.3.0)
#>  P crayon                 1.5.2     2022-09-29 [?] RSPM (R 4.3.0)
#>  P DelayedArray           0.26.7    2023-07-28 [?] Bioconductor
#>  P desc                   1.4.2     2022-09-08 [?] RSPM (R 4.3.0)
#>  P DESeq2                 1.40.2    2023-06-23 [?] Bioconductor
#>  P devtools               2.4.5     2022-10-11 [?] RSPM (R 4.3.0)
#>  P digest                 0.6.33    2023-07-07 [?] RSPM (R 4.3.0)
#>  P dplyr                * 1.1.2     2023-04-20 [?] RSPM (R 4.3.0)
#>  P ellipsis               0.3.2     2021-04-29 [?] RSPM (R 4.3.0)
#>  P evaluate               0.21      2023-05-05 [?] RSPM (R 4.3.0)
#>  P fansi                  1.0.4     2023-01-22 [?] RSPM (R 4.3.0)
#>  P farver                 2.1.1     2022-07-06 [?] RSPM (R 4.3.0)
#>  P fastmap                1.1.1     2023-02-24 [?] RSPM (R 4.3.0)
#>  P foreach                1.5.2     2022-02-02 [?] RSPM (R 4.3.0)
#>  P fs                     1.6.3     2023-07-20 [?] RSPM (R 4.3.0)
#>  P generics               0.1.3     2022-07-05 [?] RSPM (R 4.3.0)
#>  P GenomeInfoDb           1.36.2    2023-08-25 [?] Bioconductor
#>  P GenomeInfoDbData       1.2.10    2023-08-31 [?] Bioconductor
#>  P GenomicRanges          1.52.0    2023-04-25 [?] Bioconductor
#>  P ggplot2              * 3.4.3     2023-08-14 [?] RSPM (R 4.3.0)
#>  P glmnet                 4.1-8     2023-08-22 [?] RSPM (R 4.3.0)
#>  P glue                   1.6.2     2022-02-24 [?] RSPM (R 4.3.0)
#>  P gtable                 0.3.4     2023-08-21 [?] RSPM (R 4.3.0)
#>  P highr                  0.10      2022-12-22 [?] RSPM (R 4.3.0)
#>  P htmltools              0.5.6     2023-08-10 [?] RSPM (R 4.3.0)
#>  P htmlwidgets            1.6.2     2023-03-17 [?] RSPM (R 4.3.0)
#>  P httpuv                 1.6.11    2023-05-11 [?] RSPM (R 4.3.0)
#>  P IRanges                2.34.1    2023-06-22 [?] Bioconductor
#>  P iterators              1.0.14    2022-02-05 [?] RSPM (R 4.3.0)
#>  P jquerylib              0.1.4     2021-04-26 [?] RSPM (R 4.3.0)
#>  P jsonlite               1.8.7     2023-06-29 [?] RSPM (R 4.3.0)
#>  P knitr                  1.43      2023-05-25 [?] RSPM (R 4.3.0)
#>  P labeling               0.4.3     2023-08-29 [?] RSPM (R 4.3.0)
#>  P later                  1.3.1     2023-05-02 [?] RSPM (R 4.3.0)
#>  P lattice                0.21-8    2023-04-05 [?] CRAN (R 4.3.1)
#>  P lifecycle              1.0.3     2022-10-07 [?] RSPM (R 4.3.0)
#>  P locfit                 1.5-9.8   2023-06-11 [?] RSPM (R 4.3.0)
#>  P magrittr               2.0.3     2022-03-30 [?] RSPM (R 4.3.0)
#>  P Matrix                 1.6-1     2023-08-14 [?] RSPM (R 4.3.0)
#>  P MatrixGenerics         1.12.3    2023-07-30 [?] Bioconductor
#>  P matrixStats            1.0.0     2023-06-02 [?] RSPM (R 4.3.0)
#>  P memoise                2.0.1     2021-11-26 [?] RSPM (R 4.3.0)
#>  P mime                   0.12      2021-09-28 [?] RSPM (R 4.3.0)
#>  P miniUI                 0.1.1.1   2018-05-18 [?] RSPM (R 4.3.0)
#>  P munsell                0.5.0     2018-06-12 [?] RSPM (R 4.3.0)
#>  P pillar                 1.9.0     2023-03-22 [?] RSPM (R 4.3.0)
#>  P pkgbuild               1.4.2     2023-06-26 [?] RSPM (R 4.3.0)
#>  P pkgconfig              2.0.3     2019-09-22 [?] RSPM (R 4.3.0)
#>  P pkgdown                2.0.7     2022-12-14 [?] RSPM (R 4.3.0)
#>  P pkgload                1.3.2.1   2023-07-08 [?] RSPM (R 4.3.0)
#>  P prettyunits            1.1.1     2020-01-24 [?] RSPM (R 4.3.0)
#>  P processx               3.8.2     2023-06-30 [?] RSPM (R 4.3.0)
#>  P profvis                0.3.8     2023-05-02 [?] RSPM (R 4.3.0)
#>  P promises               1.2.1     2023-08-10 [?] RSPM (R 4.3.0)
#>  P ps                     1.7.5     2023-04-18 [?] RSPM (R 4.3.0)
#>  P purrr                  1.0.2     2023-08-10 [?] RSPM (R 4.3.0)
#>  P R6                     2.5.1     2021-08-19 [?] RSPM (R 4.3.0)
#>  P ragg                   1.2.5     2023-01-12 [?] RSPM (R 4.3.0)
#>  P Rcpp                   1.0.11    2023-07-06 [?] RSPM (R 4.3.0)
#>  P RCurl                  1.98-1.12 2023-03-27 [?] RSPM (R 4.3.0)
#>  P remotes                2.4.2.1   2023-07-18 [?] RSPM (R 4.3.0)
#>    renv                   1.0.2     2023-08-15 [2] RSPM (R 4.3.0)
#>  P rlang                  1.1.1     2023-04-28 [?] RSPM (R 4.3.0)
#>  P rmarkdown              2.24      2023-08-14 [?] RSPM (R 4.3.0)
#>  P rprojroot              2.0.3     2022-04-02 [?] RSPM (R 4.3.0)
#>  P S4Arrays               1.0.5     2023-07-24 [?] Bioconductor
#>  P S4Vectors              0.38.1    2023-05-02 [?] Bioconductor
#>  P sass                   0.4.7     2023-07-15 [?] RSPM (R 4.3.0)
#>  P scales                 1.2.1     2022-08-20 [?] RSPM (R 4.3.0)
#>  P sessioninfo            1.2.2     2021-12-06 [?] RSPM (R 4.3.0)
#>  P shape                  1.4.6     2021-05-19 [?] RSPM (R 4.3.0)
#>  P shiny                  1.7.5     2023-08-12 [?] RSPM (R 4.3.0)
#>  P stringi                1.7.12    2023-01-11 [?] RSPM (R 4.3.0)
#>  P stringr                1.5.0     2022-12-02 [?] RSPM (R 4.3.0)
#>  P SummarizedExperiment   1.30.2    2023-06-06 [?] Bioconductor
#>  P survival               3.5-7     2023-08-14 [?] RSPM (R 4.3.0)
#>  P systemfonts            1.0.4     2022-02-11 [?] RSPM (R 4.3.0)
#>  P textshaping            0.3.6     2021-10-13 [?] RSPM (R 4.3.0)
#>  P tibble                 3.2.1     2023-03-20 [?] RSPM (R 4.3.0)
#>  P tidyr                * 1.3.0     2023-01-24 [?] RSPM (R 4.3.0)
#>  P tidyselect             1.2.0     2022-10-10 [?] RSPM (R 4.3.0)
#>  P urlchecker             1.0.1     2021-11-30 [?] RSPM (R 4.3.0)
#>  P usethis                2.2.2     2023-07-06 [?] RSPM (R 4.3.0)
#>  P utf8                   1.2.3     2023-01-31 [?] RSPM (R 4.3.0)
#>  P vctrs                  0.6.3     2023-06-14 [?] RSPM (R 4.3.0)
#>  P withr                  2.5.0     2022-03-03 [?] RSPM (R 4.3.0)
#>  P xfun                   0.40      2023-08-09 [?] RSPM (R 4.3.0)
#>  P xtable                 1.8-4     2019-04-21 [?] RSPM (R 4.3.0)
#>  P XVector                0.40.0    2023-04-25 [?] Bioconductor
#>  P yaml                   2.3.7     2023-01-23 [?] RSPM (R 4.3.0)
#>  P yardstick              1.2.0     2023-04-21 [?] RSPM (R 4.3.0)
#>  P zlibbioc               1.46.0    2023-04-25 [?] Bioconductor
#> 
#>  [1] /tmp/Rtmp4vwKTI/temp_libpath143d3eff14e6
#>  [2] /home/runner/.cache/R/renv/library/cd8ippred-05538736/R-4.3/x86_64-pc-linux-gnu
#>  [3] /home/runner/.cache/R/renv/sandbox/R-4.3/x86_64-pc-linux-gnu/5cd49154
#> 
#>  P ── Loaded and on-disk path mismatch.
#> 
#> ──────────────────────────────────────────────────────────────────────────────