WIP: POC Redesign extraction #256

llrs-roche · 2025-03-05T09:56:41Z

Linked to insightsengineering/NEST-roadmap#36

Pull Request

Redesign extraction

This branch/PR is a POC of specification of user input to be extracted.
See tests added about how it works (currently most if not all of them should work), but simple example of specification:

spec <- c(datasets("df"), variables(where(is.factor)))

Features:

Specification can be composed, so it allows to reuse specifications.
Three different steps: specification, validation and extraction.
Supporting a new object class is as easy as adding a new extraction method (if the default doesn't work) so that user supplied functions work on that class.
Possible to set a defaults for the choices. (See TODO list).
Simple to define and reuse if user defines good names of each specification.

Examples

Resolve input to be used by module developer

The module_input_ui and module_input_server help the developer to get the required data.

devtools::load_all(".")
library("shiny")
library("teal")
options(shiny.reactlog = TRUE)


# UI function for the custom histogram module
histogram_module_ui <- function(id) {
  ns <- shiny::NS(id)
  shiny::tagList(
    shiny::selectInput(
      ns("dataset"),
      "Select Dataset",
      choices = NULL    ),
    shiny::selectInput(
      ns("variable"),
      "Select Variable",
      choices = NULL
    ),
    shiny::plotOutput(ns("histogram_plot")),
    shiny::verbatimTextOutput(ns("plot_code")) # To display the reactive plot code
  )
}

# Server function for the custom histogram module with injected variables in within()
histogram_module_server <- function(id, data, spec) {
  moduleServer(id, function(input, output, session) {
    # Update dataset choices based on available datasets in teal_data
    spec_resolved <- teal.transform::resolver(spec, data())

    shiny::updateSelectInput(
      session,
      "dataset",
      choices = spec_resolved$datasets$names,
      selected = spec_resolved$datasets$select
    )
    react_dataset <- reactive({
      req(input$dataset)
      req(input$dataset %in% spec_resolved$datasets$names)
      if (!input$dataset %in% spec_resolved$datasets$select) {
        spec_resolved |>
          update_spec("datasets", input$dataset) |>
          teal.transform::resolver(data())
      } else {
        spec_resolved
      }
    })

    react_variable <- reactive({
      req(input$variable, react_dataset())
      spec_resolved <- react_dataset()
      req(input$variable %in% spec_resolved$variables$names)
      if (!input$variable %in% spec_resolved$variables$select) {
        react_dataset() |>
          update_spec("variables", input$variable) |>
          teal.transform::resolver(data())
      } else {
        spec_resolved
      }
    })

    observe({
      spec_resolved <- req(react_dataset())
      shiny::updateSelectInput(
        session,
        "variable",
        choices = spec_resolved$variables$names,
        selected = spec_resolved$variables$select
      )
    })

    # Create a reactive `teal_data` object with the histogram plot
    result <- reactive({
      spec_resolved <- req(react_variable())
      # Create a new teal_data object with the histogram plot
      new_data <- within(
        data(),
        {
          my_plot <- hist(
            input_dataset[[input_vars]],
            las = 1,
            main = paste("Histogram of", input_vars),
            xlab = input_vars,
            col = "lightblue",
            border = "black"
          )
        },
        input_dataset = as.name(as.character(spec_resolved$datasets$select)), # Replace `input_dataset` with selection dataset
        input_vars = as.character(spec_resolved$variables$select) # Replace `input_vars` with selection
      )
      new_data
    })

    # Render the histogram from the updated teal_data object
    output$histogram_plot <- shiny::renderPlot({
      req(result())
      result()[["my_plot"]] # Access and render the plot stored in `new_data`
    })

    # Reactive expression to get the generated code for the plot
    output$plot_code <- shiny::renderText({
      teal.code::get_code(result()) # Retrieve and display the code for the updated `teal_data` object
    })
  })
}

# Custom histogram module creation
create_histogram_module <- function(label = "Histogram Module", spec) {

  teal::module(
    label = label,
    ui = histogram_module_ui,
    server = histogram_module_server,
    server_args = list(spec = spec),
    datanames = "all"
  )
}

# Initialize the teal app
app <- init(
  data = teal_data(IRIS = iris, MTCARS = mtcars),
  modules = modules(create_histogram_module(spec = c(datasets(where(is.data.frame)),
                                                     variables(where(is.numeric)))))
)

runApp(app)

Merge arbitrary data

The merge_module_srv accepts the inputs and merge arbitrary data processed by the module_input_server, this is done thanks to extract_input the responsible to obtain the requested data from data().

devtools::load_all(".")
library("teal")
library("shiny")
library("dplyr")

# Initialize the teal app
mod <- function(label, x, y) {
  module(
    ui = function(id, x, y) {
      ns <- NS(id)
      div(
        module_input_ui(ns("x"), "x", x),
        module_input_ui(ns("y"), "y", y),
        shiny::tagList(
          shiny::textOutput(ns("text")),
          shiny::tableOutput(ns("table"))
        )
      )
    },
    server = function(id, data, x, y) {
      moduleServer(id, function(input, output, session) {
        x_in <- module_input_server("x", x, data)
        y_in <- module_input_server("y", y, data)

        merged_data <- reactive({
          req(x_in(), y_in())
          data_to_merge <- list(x_in(), y_in())
          m <- merge_call_multiple(data_to_merge, ids = NULL, merge_function = "dplyr::full_join", data = data())
          m
        })

        output$table <- shiny::renderTable({
          merged_data()[["code"]][["ANL"]]
        })
      })
    },
    ui_args = list(x = x, y = y),
    server_args = list(x = x, y = y)
  )
}

data <- within(teal.data::teal_data(), {
  df1 <- data.frame(id = 1:10, var1 = letters[1:10], var2 = letters[1:10], var3 = 1:10)
  df2 <- data.frame(id = rep(1:10, 2), var2 = letters[1:20], var3 = factor(LETTERS[1:20]), var4 = LETTERS[1:20])
})

ids_df1 <- c("id", "var1", "var2")
ids_df2 <- c("id", "var2")

app <- init(
  data = data,
  modules = modules(
    mod(x = c(datasets("df1"), variables(all_of(ids_df1), all_of(ids_df1))),
        y = c(datasets("df2"), variables(all_of(ids_df2), all_of(ids_df2))))
  )
)

shinyApp(app$ui, app$server)

MAE

devtools::load_all()
library("DFplyr") #
# library("SummarizedExperiment")

# MAE ####
tda <- within(teal.data::teal_data(), {
  library("MultiAssayExperiment")
  m <- diag(5)
  i <- iris
  data(miniACC, envir = environment())
  mae2 <- hermes::multi_assay_experiment
})


r <- resolver(c(datasets(where(function(x){is(x, "MultiAssayExperiment")})),
           mae_colData(where(is.numeric))),
         tda)

tda[[r[[1]]$selected]] |>
  colData() |>
  select(!!r[[2]]$selected)

TODO

Data extraction and preparation design.
UI update based on partial resolution.
Check it is simple for app developers.
Use tidyselect instead of own operators and resolution.
Update API to not rely on operators on this package (rely only on those on tidyselect)
Test extraction on non data.frame objects: MAE objects/matrices, lists...
Check missing corners: test required changes on current modules:
tmg: tm_missing_data, tm_g_distribution, and tm_data_table;
tmc: tm_a_mmrm and tm_t_events
Only up to the server body, check the defaults,
8 hours each module, mmrm = 10 hours.
Verify value specification: Some modules request variable A vs other variables or a numeric input.

To test the data extraction and merging the module (ANL).
With different data types: data.frame, Matrices, MAE, SE
20 hours

chlebowa · 2025-03-05T11:41:46Z

spec <- datasets("df") & variables(is.factor)

This would specify "factor columns from df", correct? Variables are specified with predicate functions?
Currently variable choices are specified with functions that act on the whole dataset, which allows for more flexibility, and which was deemed necessary in preliminary discussions. If one wants to use all columns whose names are in all caps, it cannot be done by applying predicates to columns.

llrs-roche · 2025-03-05T13:10:32Z

This would specify "factor columns from df", correct?

Yes, I'm glad it is easy to understand.

Variables are specified with predicate functions? Currently variable choices are specified with functions that act on the whole dataset, which allows for more flexibility, and which was deemed necessary in preliminary discussions. If one wants to use all columns whose names are in all caps, it cannot be done by applying predicates to columns.

Not sure I fully understand your question. Variables can also be manually set, for example variables("A"). But filtering columns whose names are all in caps is currently possible:

td <- within(teal.data::teal_data(), {
  df <- data.frame(A = as.factor(LETTERS[1:5]), Ab = letters[1:5])
  m <- matrix()
})
spec <- datasets("df") & variables(function(x){x==toupper(x)})
resolver(spec, td) #no print method available yet, but basically resolves to df dataset and A variable

The only limitation is that the function provided should return a logical vector. In terms of implementation, the function is applied to the names and to the values, precisely to accommodate cases like this.

chlebowa · 2025-03-05T13:23:59Z

spec <- datasets("df") & variables(function(x){x==toupper(x)})

I don't get this one. I would think it would return columns where the values are in all caps.

chlebowa · 2025-03-05T13:24:56Z

In terms of implementation, the function is applied to the names and to the values, precisely to accommodate cases like this.

The same function is applied to column names and column values? 🤔

llrs-roche · 2025-03-05T13:56:32Z

spec <- datasets("df") & variables(function(x){x==toupper(x)})

I don't get this one. I would think it would return columns where the values are in all caps.

It could mean both things imho. But here it is specifying variables, not values. For filtering by value there is values() (still work in progress).
Perhaps this other example is more representative of what I meant, (didn't realize the confusion when I reused an example with capital letters):

td <- within(teal.data::teal_data(), {
    df <- data.frame(A = as.factor(letters[1:5]), Ab = letters[1:5])
    m <- matrix()
})
spec <- datasets("df") & variables(function(x){x==toupper(x)})
resolver(spec, td)

Still selects A as the name is in capital letters

The same function is applied to column names and column values? 🤔

Yes, if there is a single argument that should accept is.factor and all_caps <- function(x){x == toupper(x)} and filter by names too with functions like stars_with(). I hope this helps.

chlebowa · 2025-03-05T14:13:24Z

Thanks, it does explain things a bit.

llrs-roche · 2025-03-07T10:46:33Z

I updated the branch to make it easy to reflect updates on selections from the possible choices. Besides some internal functions, it now exports a new key function update_spec() to check if updated selections invalidate other selections (If the dataset changes, variables should change too).

Here is a full example with a shiny module and teal:

library("teal")
library("shiny")

# Initialize the teal app
mod <- function(label, x, y) {
  module(
    ui = function(id, x, y) {
      ns <- NS(id)
      div(
        module_input_ui(ns("x"), "x", x),
        module_input_ui(ns("y"), "y", y),
        shiny::tagList(
          shiny::textOutput(ns("text"))
        )
      )
    },
    server = function(id, data, x, y) {
      moduleServer(id, function(input, output, session) {
        x_in <- module_input_server("x", x, data)
        y_in <- module_input_server("y", y, data)

        output$text <- shiny::renderText({
          req(x_in(), y_in())
          l <- lapply(
            list(x = x_in, y = y_in),
            function(sel) {
              if (sum(lengths(sel()))) {
                paste0(
                  "Object: ", sel()$datasets, "\nVariables: ",
                  paste0(sel()$variables, collapse = ", ")
                )
              } else {
                "No info??"
              }
            }
          )
          unlist(l)
        })
      })
    },
    ui_args = list(x = x, y = y),
    server_args = list(x = x, y = y)
  )
}

Here I create three different specifications, you can pass them to the app and they will work:

iris_numeric <- datasets("IRIS") & variables(is.numeric)
mtcars_char <- datasets("MTCARS") & variables(is.character)
iris_numeric_or_mtcars_char <- iris_numeric | mtcars_char

app <- init(
  data = teal.data::teal_data(IRIS = iris, MTCARS = mtcars),
  modules = modules(
    mod(x = datasets("IRIS") & variables(is.factor),
        y = datasets("MTCARS") & variables(is.numeric))
  )
)

shinyApp(app$ui, app$server)

…sengineering/teal.transform into redesign_extraction@main

gogonzo · 2025-05-08T10:27:22Z

R/resolver.R

+eval_type_select <- function(type, data) {
+  stopifnot(is.character(type$names))
+
+  orig_select <- orig(type$select)
+  if (length(orig_select) == 1L) {
+    orig_select <- orig_select[[1L]]
+  }
+
+  names <- seq_along(type$names)
+  names(names) <- type$names
+  select_data <- selector(data, type$select)
+
+  # Keep only those that were already selected
+  new_select <- intersect(unique(names(c(select_data))), unorig(type$names))
+  if (is.null(new_select)) {
+    stop("No ", is(type), " selection is possible.")
+  }
+  attr(new_select, "original") <- orig_select
+
+  type$select <- new_select
+
+  type


select shouldn't be a "function" of data but of choices. What if we reduce the complexity to this (plus some additional warnings/assertions within this if)?

Suggested change

eval_type_select <- function(type, data) {

stopifnot(is.character(type$names))

orig_select <- orig(type$select)

if (length(orig_select) == 1L) {

orig_select <- orig_select[[1L]]

}

names <- seq_along(type$names)

names(names) <- type$names

select_data <- selector(data, type$select)

# Keep only those that were already selected

new_select <- intersect(unique(names(c(select_data))), unorig(type$names))

if (is.null(new_select)) {

stop("No ", is(type), " selection is possible.")

}

attr(new_select, "original") <- orig_select

type$select <- new_select

type

eval_type_select <- function(type) {

stopifnot(is.character(type$names))

orig_select <- orig(type$select)

new_select <- if (!checkmate::test_integerish(orig_select)) {

type$names[orig_select]

} else {

intersect(orig_select, type$names)

}

attr(new_select, "original") <- orig_select

type$select <- new_select

type

I agree this will simplify the complexity if we only want to select by name. But I thought we wanted to be able to select by other properties. For example:

spec <- c(datasets(where(is.data.frame)), variables(where(is.character), where(function(x){n_distinct(x) > 5}))) td <- within(teal.data::teal_data(), {df <- data.frame(A = rep_len("A", 6), B = letters[1:6])}) library("dplyr") r <- resolver(spec, td)

With your patch we can't resolve to select B and it wouldn't work by default. I think this extra complexity is worth it the flexibility (it is pretty similar to the eval_type_names) and this is in line of Pawel's comment to directly give access to the data but extended to the selection. But it's your call.

@llrs-roche and @pawelru please convince me why this should work. And explain what you'd do if intersect(select, x) results in character(0).

variables( x = where(is.character), # possible choices select = where(function(x){n_distinct(x) > 5}) # initial selection )

I agree this will simplify the complexity if we only want to select by name

Not only by name. IMO select should be a "function" of choices. And yes, function of choices might produce invalid result as well. For example if x = letters[1:5], select = head(10) it won't give 10-selections, but it is more robust.

please convince me why this should work. And explain what you'd do if intersect(select, x) results in character(0).

Mmh, this could either be an error or just leave the user to pick one of the choices.

Not only by name. IMO select should be a "function" of choices.

Ok, but then we need the tidyselect help to evaluate the function, which is what I tried to do here. I have misunderstood the intention of the patch. Sorry

Let's continue a debate ;]

Mmh, this could either be an error ...

It results in app-user facing punished with error made by an app-developer lost in too complex api.

... or just leave the user to pick one of the choices.

Empty initial selection could be also an error (depending on module's validation).

It results in app-user facing punished with error made by an app-developer lost in too complex api.

Maybe the app-developer would see the error before sharing/deploying it. But it's true we shouldn't try to guide them better than this.

Empty initial selection could be also an error (depending on module's validation).

This could be handled at the module level instead of at the extraction level.

Maybe the app-developer would see the error before sharing/deploying it. But it's true we shouldn't try to guide them better than this.

No easy to address. This is run and resolved inside of the teal_module's server so it takes a while before extract meets data.

This could be handled at the module level instead of at the extraction level.

Please elaborate more

Maybe the app-developer would see the error before sharing/deploying it. But it's true we shouldn't try to guide them better than this.

No easy to address. This is run and resolved inside of the teal_module's server so it takes a while before extract meets data.

Indeed

This could be handled at the module level instead of at the extraction level.

Please elaborate more

Some modules will only allow to select one while others will allow to select multiple variables. So I think this kind of validation shouldn't be on the extraction feature. Between not selecting something and selecting something is more debatable in my opinion. But if we already need to include validation for one or more than one selection we can add validation between 0 and 1 at the same time.

gogonzo · 2025-05-08T10:59:35Z

R/types.R

+type_helper <- function(x, select, type) {
+  out <- list(names = x, select = select)
+  class(out) <- c(type, "type", "list")
+  attr(out$names, "original") <- x


names <- x. Please stay consistent and use either names or x everywhere. IMO x is not informative at all.

I will use names (or maybe options or choices to make it easier for users to move to this API). I was thinking on extending this idea of using more accurate names to other functions and methods now that the feature is more complete. I'll check on what it was added and see what makes more sense.

Some propositions: choices/selected, choices/active, superset/subset, X/x, possible/chosen, from/select, pick/from, all/picked...

llrs-roche · 2025-05-09T11:55:26Z

Short summary about the discussion we had regarding selection based on the possible choices and how to select them. We decided to keep the tidyselect on the selection but only pass the possible choices.

A small example for the extraction of the factors variables but selecting those without NAs:

devtools::load_all(".")

td <- within(teal.data::teal_data(), {
  i <- iris
  p <- penguins
  m <- diag(5)
})

r <- resolver(c(datasets(where(is.data.frame), "p"),
                variables(where(is.factor),
                          !where(anyNA))),
              td)
r$variables$select
## "species" "island"

We don't need to repeat the initial code for selecting the variables and tidyselection makes it flexible for multiple use cases (for example avoid plotting with NAs or outliers even if they are valid numeric or categorical values).

llrs-roche added 3 commits March 3, 2025 09:09

Add some initial definitions

e0e1571

Add some infrastructure

707224e

Resolution tested up to variables

9f9aacb

llrs-roche added enhancement core labels Mar 5, 2025

llrs-roche added 2 commits March 7, 2025 10:37

Add function for when a new selection is chosen

0d89dce

Raising more errors and silencing some tests

58182d2

llrs-roche added 16 commits March 10, 2025 17:30

Improve update_spec

c21591f

Simplify tests

8023b1c

Add print method

37225be

Uncomment some tests

e1ec420

Make print work

a8bb226

Move delayed to attributes

f4bf89a

Add missing method!

a1678fa

Simplify type creation

06c0eb9

Simplify resolver

208fb53

Delayed takes into consideration the selection too

cb94d6d

Fix mistake and remove delayed class

158eda9

Resolve better by names

d545331

Fix issues

99cadc2

Simplify c.type

b26695a

Use Group Generic Functions

6600d17

Improve tests

df47b48

llrs-roche added 9 commits April 29, 2025 10:10

Add tidyselect::eval_select

386232d

Add support for expressions in types

64c42e2

Use the tidyselect approach (remove operators)

2a00dd2

Remove extra MAE methods

a78ecce

Fix issues

6fa687e

Fix remaining tests

18df6fc

Fix bugs

5cadc43

Merge branch 'redesign_extraction@main' of https://github.com/insight…

72c975f

…sengineering/teal.transform into redesign_extraction@main

Add method for colData and avoid Infinite recursion

50576d6

llrs-roche mentioned this pull request May 8, 2025

[Question]: Does choices_selected check if selected is in choices for value_choices? #253

Closed

3 tasks

If selection is empty it stops

a031ad9

gogonzo reviewed May 8, 2025

View reviewed changes

llrs-roche added 8 commits May 9, 2025 09:38

Rename x argument to names and transform class to specification

1535244

Revert testing to allow empty selection

1385c1f

Fix problems with renamings

85787b8

Extract before tidyselect on names

3fafc92

Reuse extraction

5ccfe34

Match expectations based on names

10509da

Clean up of unused functions and arguments

5b8b51d

Restore test

29c6500

llrs-roche added 8 commits May 9, 2025 19:59

Renaming

3ba1bf5

Fix renaming issues

ced9053

Remove unused package

f3e3fa3

Renaming

55a8537

Select instead of merging

6bf1af3

Add function to provide the list of inputs

744a271

Improve calls and merging

c773304

Fix unresolved updates

0638534

Uh oh!

WIP: POC Redesign extraction #256

Are you sure you want to change the base?

WIP: POC Redesign extraction #256

Conversation

llrs-roche commented Mar 5, 2025 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Pull Request

Examples

TODO

Uh oh!

chlebowa commented Mar 5, 2025

Uh oh!

llrs-roche commented Mar 5, 2025 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

chlebowa commented Mar 5, 2025

Uh oh!

chlebowa commented Mar 5, 2025

Uh oh!

llrs-roche commented Mar 5, 2025

Uh oh!

chlebowa commented Mar 5, 2025

Uh oh!

llrs-roche commented Mar 7, 2025 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

gogonzo May 8, 2025 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Choose a reason for hiding this comment

Uh oh!

llrs-roche May 8, 2025 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Choose a reason for hiding this comment

Uh oh!

gogonzo May 8, 2025 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Choose a reason for hiding this comment

Uh oh!

llrs-roche May 8, 2025 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Choose a reason for hiding this comment

Uh oh!

gogonzo May 8, 2025

Choose a reason for hiding this comment

Uh oh!

llrs-roche May 8, 2025

Choose a reason for hiding this comment

Uh oh!

gogonzo May 9, 2025

Choose a reason for hiding this comment

Uh oh!

llrs-roche May 9, 2025

Choose a reason for hiding this comment

Uh oh!

gogonzo May 8, 2025

Choose a reason for hiding this comment

Uh oh!

llrs-roche May 8, 2025

Choose a reason for hiding this comment

Uh oh!

gogonzo May 8, 2025

Choose a reason for hiding this comment

Uh oh!

llrs-roche commented May 9, 2025

Uh oh!

Uh oh!

llrs-roche commented Mar 5, 2025 •

edited

Loading

llrs-roche commented Mar 5, 2025 •

edited

Loading

llrs-roche commented Mar 7, 2025 •

edited

Loading

gogonzo May 8, 2025 •

edited

Loading

llrs-roche May 8, 2025 •

edited

Loading

gogonzo May 8, 2025 •

edited

Loading

llrs-roche May 8, 2025 •

edited

Loading