#> ── Attaching core tidyverse packages ──────────────────── tidyverse 2.0.0 ──
#> ✔ dplyr     1.1.4     ✔ readr     2.1.5
#> ✔ forcats   1.0.0     ✔ stringr   1.5.1
#> ✔ ggplot2   3.5.0     ✔ tibble    3.2.1
#> ✔ lubridate 1.9.3     ✔ tidyr     1.3.1
#> ✔ purrr     1.0.2     
#> ── Conflicts ────────────────────────────────────── tidyverse_conflicts() ──
#> ✖ dplyr::filter() masks stats::filter()
#> ✖ dplyr::lag()    masks stats::lag()
#> ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors

Introduction

Distributing samples to wells plates for experimental procedures is a very common task. In the following we use a data set of longitudinal subject samples that are to be spread across several n-well plates, balanced for treatment group and time point longitudinal_subject_samples.

data("longitudinal_subject_samples")
head(longitudinal_subject_samples)
#> # A tibble: 6 × 9
#>   SampleID SampleType SubjectID Group  Week Sex     Age   BMI SamplesPerSubject
#>   <chr>    <fct>      <chr>     <chr> <dbl> <chr> <dbl> <dbl>             <dbl>
#> 1 P01W1    Sample     P01       1         1 F        71  28.1                 7
#> 2 P01W4    Sample     P01       1         4 F        71  28.1                 7
#> 3 P01W6    Sample     P01       1         6 F        71  28.1                 7
#> 4 P01W10   Sample     P01       1        10 F        71  28.1                 7
#> 5 P01W14   Sample     P01       1        14 F        71  28.1                 7
#> 6 P01W18   Sample     P01       1        18 F        71  28.1                 7

In the fist example we’ll use a subset of samples to demonstrate the process.

  • We have 33 subject with each 2 samples.
  • Factors we want to balance are treatment group and Sex.
dat <- longitudinal_subject_samples |>
  filter(Group %in% 1:5, Week %in% c(1, 4)) |>
  select(SampleID, SubjectID, Group, Sex, Week)

# for simplicity: remove two subjects that don't have both visits
dat <- dat |>
  group_by(SubjectID) |>
  filter(n() == 2) |>
  ungroup()

Layout optimization in one go

For placing samples on plates and optimizing across plate distribution of factors as well as the within plate spacial distribution, the multi_plate_wrapper() function can be used. It focuses first on assigning samples to plates and then optimizes the layout within plates.

To place all 66 samples on 24-well plates we create a batch_container with 3 plates

We do the initial assignment of sample to plates and plot it

set.seed(42)

bc <- BatchContainer$new(
  dimensions = list("plate" = 3, "row" = 4, "col" = 6),
)

bc <- assign_in_order(bc, dat)

head(bc$get_samples()) |> gt::gt()
plate row col SampleID SubjectID Group Sex Week
1 1 1 P01W1 P01 1 F 1
1 1 2 P01W4 P01 1 F 4
1 1 3 P02W1 P02 1 M 1
1 1 4 P02W4 P02 1 M 4
1 1 5 P03W1 P03 1 M 1
1 1 6 P03W4 P03 1 M 4

We can view the initial assignment with plot_plate

cowplot::plot_grid(
  plotlist = list(
    plot_plate(bc,
      plate = plate, row = row, column = col, .color = Group,
      title = "Initial layout by Group"
    ),
    plot_plate(bc,
      plate = plate, row = row, column = col, .color = Sex,
      title = "Initial layout by Sex"
    )
  ),
  nrow = 2
)

For optimization optimize_multi_plate_design() iteratively calls optimize_design() for different steps of the experiment. For across plate optimization osat scoring is used. For within plate optimization spatial scoreing is used. The order of the factors indicate their relative importance. In this case we prioritize Group over Sex.

bc <- optimize_multi_plate_design(bc,
  across_plates_variables = c("Group", "Sex"),
  within_plate_variables = c("Group"),
  plate = "plate",
  row = "row",
  column = "col",
  n_shuffle = 2,
  max_iter = 700,
  quiet = TRUE
)
#> Warning in osat_score(bc, batch_vars = batch_vars, feature_vars = feature_vars,
#> : NAs in features / batch columns; they will be excluded from scoring

#> Warning in osat_score(bc, batch_vars = batch_vars, feature_vars = feature_vars,
#> : NAs in features / batch columns; they will be excluded from scoring
cowplot::plot_grid(
  plotlist = list(
    plot_plate(bc,
      plate = plate, row = row, column = col, .color = Group,
      title = "Initial layout by Group"
    ),
    plot_plate(bc,
      plate = plate, row = row, column = col, .color = Sex,
      title = "Initial layout by Sex"
    )
  ),
  nrow = 2
)

We can look at the trace objects for each internal optimize_design run, returned from the wrapper function.

bc$scores_table() |>
  ggplot(aes(step, value, color = score)) +
  geom_line() +
  geom_point() +
  facet_wrap(~ optimization_index, scales = "free_y")
#> Warning: Removed 6309 rows containing missing values or values outside the scale
#> range (`geom_line()`).
#> Warning: Removed 6309 rows containing missing values or values outside the scale
#> range (`geom_point()`).

Plate scoring

Note that internally the wrapper function sets up plate specific scoring functions that could manually be set up in the following way.

scoring_f <- c(
  Group = mk_plate_scoring_functions(bc,
    plate = "plate", row = "row", column = "col",
    group = "Group", penalize_lines = "hard"
  ),
  Sex = mk_plate_scoring_functions(bc,
    plate = "plate", row = "row", column = "col",
    group = "Sex", penalize_lines = "hard"
  )
)

For more information on customized plate scoring see vignette Plate scoring examples.

Two step approach

Sometimes layout requests can be more complicated. Assume we want to keep the two samples of a subject on the same 24 well plate.

Now we need to customize across plate optimization more so we need to split the process into two steps.

Step 1: Subjects to plates

There are 31 subjects with each 2 time points, i.e. we need ~ 11 subjects per plate and want to balance by treatment, sex.

First we create a batch container with 3 batches that each fit 11 subjects i.e. have 11 virtual locations.

For layout scoring we use OSAT score on Group and Sex variables.

Then we assign the samples randomly to the batches and look at their initial distribution.

set.seed(17) # gives `bad` random assignment

bc <- BatchContainer$new(
  dimensions = list("batch" = 3, "location" = 11)
)

scoring_f <- list(
  group = osat_score_generator(batch_vars = "batch", feature_vars = "Group"),
  sex = osat_score_generator(batch_vars = "batch", feature_vars = "Sex")
)

bc <- assign_random(
  bc,
  dat |> select(SubjectID, Group, Sex) |> distinct()
)

bc$get_samples() |>
  head() |>
  gt::gt()
batch location SubjectID Group Sex
1 1 NA NA NA
1 2 P32 5 M
1 3 P10 3 F
1 4 P26 3 M
1 5 P17 5 M
1 6 P07 2 F
cowplot::plot_grid(
  plotlist = list(
    bc$get_samples() |> ggplot(aes(x = batch, fill = Group)) +
      geom_bar() +
      labs(y = "subject count"),
    bc$get_samples() |> ggplot(aes(x = batch, fill = Sex)) +
      geom_bar() +
      labs(y = "subject count")
  ),
  nrow = 1
)

Optimizing the layout with optimize_design()

bc <- optimize_design(
  bc,
  scoring = scoring_f,
  n_shuffle = 1,
  acceptance_func = ~ accept_leftmost_improvement(..., tolerance = 0.01),
  max_iter = 150,
  quiet = TRUE
)
#> Warning in osat_score(bc, batch_vars = batch_vars, feature_vars = feature_vars,
#> : NAs in features / batch columns; they will be excluded from scoring

#> Warning in osat_score(bc, batch_vars = batch_vars, feature_vars = feature_vars,
#> : NAs in features / batch columns; they will be excluded from scoring

After optimization the group and sex of samples are equally distributed across all plates. The lower right panel shows the optimization trace of the scores.

cowplot::plot_grid(
  plotlist = list(
    bc$get_samples() |> ggplot(aes(x = batch, fill = Group)) +
      geom_bar() +
      labs(y = "subject count"),
    bc$get_samples() |> ggplot(aes(x = batch, fill = Sex)) +
      geom_bar() +
      labs(y = "subject count"),
    bc$plot_trace(include_aggregated = TRUE)
  ),
  ncol = 3
)

Step 2: Within plate sample distribution

Using the result from step 1 we now optimize the layout within plates. For this we still need to add empty wells to each batch and assign the pre-allocated sample sheet in the right way to the new batch container.

dat <- dat |>
  left_join(bc$get_samples() |>
    select(SubjectID, batch))
#> Joining with `by = join_by(SubjectID)`

# add empty wells depending on how full the batch plate is
dat <- dat |>
  bind_rows(data.frame(
    SubjectID = "empty",
    SampleID = paste("empty", 1:(3 * 24 - nrow(dat))),
    batch = rep(1:3, 24 - (dat |> count(batch) |> pull(n)))
  ))

bc <- BatchContainer$new(
  dimensions = list("plate" = 3, "row" = 4, "col" = 6)
)

# initial assignment such that the original plate assigned stays the same
bc <- assign_in_order(
  bc,
  dat |> arrange(batch)
)


cowplot::plot_grid(
  plotlist = list(
    plot_plate(bc,
      plate = plate, row = row, column = col, .color = Group,
      title = "Initial layout by Group"
    ),
    plot_plate(bc,
      plate = plate, row = row, column = col, .color = Sex,
      title = "Initial layout by Sex"
    )
  ),
  nrow = 2
)

As we have already assigned samples to plates, the across plate optimization can be skipped in the wrapper. For distributing samples within each plate, we use variables Group and Sex again. The order of the factors indicate their relative importance.

bc <- optimize_multi_plate_design(bc,
  within_plate_variables = c("Group", "Sex"),
  plate = "plate",
  row = "row",
  column = "col",
  n_shuffle = 2,
  max_iter = 1000,
  quiet = TRUE
)
cowplot::plot_grid(
  plotlist = list(
    plot_plate(bc,
      plate = plate, row = row, column = col, .color = Group,
      title = "Final layout by Group"
    ),
    plot_plate(bc,
      plate = plate, row = row, column = col, .color = Sex,
      title = "Final layout by Sex"
    )
  ),
  nrow = 2
)

bc$scores_table() |>
  ggplot(aes(step, value, color = score)) +
  geom_line() +
  geom_point() +
  facet_wrap(~ optimization_index)

Full dataset

In the following we use the full data set of longitudinal subject samples that are to be spread across several n-well plates, balanced for treatment group and time point longitudinal_subject_samples.

In addition to the normal samples there are also controls to be placed on each plate. These are added after the sample batching step.

For accommodation of samples to plates there are the following control samples available

longitudinal_subject_samples |>
  filter(SampleType != "Sample") |>
  count(SampleType, Group) |>
  gt::gt()
SampleType Group n
Control Pool 6
Standard SpikeIn 15

Step 1: Batching

Again we want to keep all samples of a subject on the same plate. A first step could be grouping subjects into 3 batches blocking by treatment, sex and age. There are 34 subjects with each 3 - 8 time points, i.e. we need ~ 11 subjects per plate.

We first create a ‘subjects’ dataset.

# get subject data for batching
subjects <- longitudinal_subject_samples |>
  filter(SampleType == "Sample") |>
  count(SubjectID, Group, Sex, Age, name = "nTimePoints") |>
  distinct()

subjects |>
  select(-nTimePoints) |>
  slice(1:5) |>
  gt::gt() |>
  gt::tab_options()
SubjectID Group Sex Age
P01 1 F 71
P02 1 M 74
P03 1 M 76
P04 1 F 83
P05 2 M 79

Then we create a batch container for the samples with 3 batches called plate that each fit 11 subjects i.e. have 11 virtual locations.

For layout scoring we use OSAT score on Group and Sex variables. We initially assign the samples randomly to the batches and check the layout.

set.seed(42)

bc <- BatchContainer$new(
  dimensions = list("plate" = 3, "locations" = 11)
)

scoring_f <- list(
  group = osat_score_generator(batch_vars = "plate", feature_vars = c("Group")),
  sex = osat_score_generator(batch_vars = "plate", feature_vars = "Sex")
)

bc <- assign_random(bc, subjects)
cowplot::plot_grid(
  plotlist = list(
    bc$get_samples() |> ggplot(aes(x = plate, fill = Group)) +
      geom_bar() +
      labs(y = "subject count"),
    bc$get_samples() |> ggplot(aes(x = plate, fill = Sex)) +
      geom_bar() +
      labs(y = "subject count"),
    bc$get_samples() |> ggplot(aes(x = factor(plate), y = Age)) +
      geom_boxplot() +
      geom_point()
  ),
  nrow = 1
)

Optimizing the layout with optimize_design()

bc <- optimize_design(
  bc,
  scoring = scoring_f,
  n_shuffle = 1,
  acceptance_func = ~ accept_leftmost_improvement(..., tolerance = 0.1),
  max_iter = 150,
  quiet = TRUE
)

After optimization the group and sex of samples are equally distributed across all plates. The lower right panel shows the optimization trace of the scores.

cowplot::plot_grid(
  plotlist = list(
    bc$get_samples() |> ggplot(aes(x = plate, fill = Group)) +
      geom_bar() +
      labs(y = "subject count"),
    bc$get_samples() |> ggplot(aes(x = plate, fill = Sex)) +
      geom_bar() +
      labs(y = "subject count"),
    bc$get_samples() |> ggplot(aes(x = factor(plate), y = Age)) +
      geom_boxplot() +
      geom_point(),
    bc$plot_trace(include_aggregated = TRUE)
  ),
  nrow = 2
)

Step 2: Within plate sample distribution

We start here by creating the batch container for all samples and making an initial assignment. Note there will be empty positions on the plates which we have to add before we assign the samples to the batch container in order.

samples_with_plate <- longitudinal_subject_samples |>
  left_join(bc$get_samples() |>
    select(-locations)) |>
  mutate(plate = ifelse(SampleType == "Sample", plate, str_extract(SampleID, ".$")))
#> Joining with `by = join_by(SubjectID, Group, Sex, Age)`

# not all plates have same amount of samples
samples_with_plate |> count(plate)
#> # A tibble: 3 × 2
#>   plate     n
#>   <chr> <int>
#> 1 1        77
#> 2 2        80
#> 3 3        73

# add empty wells depending on how full the batch plate is
# column 11 and 12 are left empty: 96 - 16 = 80 samples per plate
samples_with_plate <- samples_with_plate |>
  bind_rows(data.frame(
    SubjectID = "empty",
    SampleID = paste("empty", 1:(3 * 80 - nrow(samples_with_plate))),
    plate = rep(1:3, 80 - (samples_with_plate |> count(plate) |> pull(n))) |>
      as.character()
  ))


# new batch container for step 2
bc <- BatchContainer$new(
  dimensions = list(plate = 3, row = 8, col = 12),
  exclude = crossing(plate = 1:3, row = 1:8, col = 11:12)
)

# assign samples in order of plate
bc <- assign_in_order(
  bc,
  samples_with_plate |>
    arrange(plate) |>
    rename(orig_plate = plate)
)

# check if plate assignment is still correct
bc$get_samples() |>
  summarize(all(plate == orig_plate)) |>
  unlist()
#> all(plate == orig_plate) 
#>                     TRUE
cowplot::plot_grid(
  plotlist = list(
    plot_plate(bc,
      plate = plate, row = row, column = col, .color = Group,
      title = "Initial layout by Group"
    ),
    plot_plate(bc,
      plate = plate, row = row, column = col, .color = SubjectID,
      title = "Initial layout by SubjectID"
    ) +
      theme(legend.key.size = unit(.25, "cm")),
    plot_plate(bc,
      plate = plate, row = row, column = col, .color = Sex,
      title = "Initial layout by Sex"
    )
  ),
  nrow = 3
)

As we have already assigned samples to plates, the across plate optimization can be skipped in the wrapper. For distributing samples within each plate, we use variables Group and Sex again. The order of the factors indicate their relative importance.

bc <- optimize_multi_plate_design(bc,
  within_plate_variables = c("Group", "SubjectID", "Sex"),
  plate = "plate",
  row = "row",
  column = "col",
  n_shuffle = 2,
  max_iter = 1000,
  quiet = TRUE
)
cowplot::plot_grid(
  plotlist = list(
    plot_plate(bc,
      plate = plate, row = row, column = col, .color = Group,
      title = "Final layout by Group"
    ),
    plot_plate(bc,
      plate = plate, row = row, column = col, .color =
        SubjectID, title = "Final layout by SubjectID"
    ) +
      theme(legend.key.size = unit(.25, "cm")),
    plot_plate(bc,
      plate = plate, row = row, column = col, .color = Sex,
      title = "Final layout by Sex"
    )
  ),
  nrow = 3
)