This vignette demonstrates every major feature of OdysseusCharacterizationModule using the Eunomia synthetic OMOP CDM database.
Eunomia ships four built-in cohorts — Celecoxib (id = 1), Diclofenac (id = 2), GiBleed (id = 3), and NSAIDs (id = 4).
connectionDetails <- getEunomiaConnectionDetails()
Eunomia::createCohorts(connectionDetails)
connection <- connect(connectionDetails)Verify the cohort table:
cohortCounts <- querySql(connection, "
SELECT cohort_definition_id, COUNT(*) AS cnt
FROM main.cohort
GROUP BY cohort_definition_id
ORDER BY cohort_definition_id
")
cohortCountsWe will characterise the Celecoxib new-user cohort (id = 1) throughout this vignette.
The simplest case: one domain, start-date logic, aggregated.
plan_cond <- planAnalysis(
analysisWindows = windows,
useBaseFeatures = list(
condition_occurrence = list(include = TRUE, type = "start"),
condition_era = list(include = FALSE),
drug_exposure = list(include = FALSE),
drug_era = list(include = FALSE),
procedure_occurrence = list(include = FALSE),
observation = list(include = FALSE),
device_exposure = list(include = FALSE),
visit_occurrence = list(include = FALSE),
measurement = list(include = FALSE)
),
useCohortFeatures = list(include = FALSE),
useConceptSetFeatures = list(include = FALSE)
)
specs_cond <- singleNodeSetting(
plan = plan_cond,
cohortId = COHORT_ID,
cohortDatabaseSchema = CDM_SCHEMA,
cohortTable = COHORT_TBL,
cdmDatabaseSchema = CDM_SCHEMA,
vocabularyDatabaseSchema = CDM_SCHEMA,
aggregated = TRUE
)
cat("Specs generated:", length(specs_cond), "\n")
results_cond <- executeSpecs(
connection, specs_cond,
tempEmulationSchema = TEMP_SCHEMA,
cleanTempTables = TRUE
)
head(results_cond[["1001"]], 10)plan_drug <- planAnalysis(
analysisWindows = windows,
useBaseFeatures = list(
condition_occurrence = list(include = FALSE),
condition_era = list(include = FALSE),
drug_exposure = list(include = TRUE),
drug_era = list(include = FALSE),
procedure_occurrence = list(include = FALSE),
observation = list(include = FALSE),
device_exposure = list(include = FALSE),
visit_occurrence = list(include = FALSE),
measurement = list(include = FALSE)
),
useCohortFeatures = list(include = FALSE),
useConceptSetFeatures = list(include = FALSE)
)
specs_drug <- singleNodeSetting(
plan = plan_drug,
cohortId = COHORT_ID,
cohortDatabaseSchema = CDM_SCHEMA,
cohortTable = COHORT_TBL,
cdmDatabaseSchema = CDM_SCHEMA,
vocabularyDatabaseSchema = CDM_SCHEMA,
aggregated = TRUE
)
results_drug <- executeSpecs(
connection, specs_drug,
tempEmulationSchema = TEMP_SCHEMA,
cleanTempTables = TRUE
)
head(results_drug[["1001"]], 10)Overlap logic checks whether the era period overlaps the analysis window, rather than simply checking the start date.
plan_era <- planAnalysis(
analysisWindows = windows,
useBaseFeatures = list(
condition_occurrence = list(include = FALSE),
condition_era = list(include = TRUE, type = "overlap"),
drug_exposure = list(include = FALSE),
drug_era = list(include = FALSE),
procedure_occurrence = list(include = FALSE),
observation = list(include = FALSE),
device_exposure = list(include = FALSE),
visit_occurrence = list(include = FALSE),
measurement = list(include = FALSE)
),
useCohortFeatures = list(include = FALSE),
useConceptSetFeatures = list(include = FALSE)
)
specs_era <- singleNodeSetting(
plan = plan_era,
cohortId = COHORT_ID,
cohortDatabaseSchema = CDM_SCHEMA,
cohortTable = COHORT_TBL,
cdmDatabaseSchema = CDM_SCHEMA,
vocabularyDatabaseSchema = CDM_SCHEMA,
aggregated = TRUE
)
results_era <- executeSpecs(
connection, specs_era,
tempEmulationSchema = TEMP_SCHEMA,
cleanTempTables = TRUE
)
head(results_era[["1001"]], 10)plan_dera <- planAnalysis(
analysisWindows = windows,
useBaseFeatures = list(
condition_occurrence = list(include = FALSE),
condition_era = list(include = FALSE),
drug_exposure = list(include = FALSE),
drug_era = list(include = TRUE, type = "overlap"),
procedure_occurrence = list(include = FALSE),
observation = list(include = FALSE),
device_exposure = list(include = FALSE),
visit_occurrence = list(include = FALSE),
measurement = list(include = FALSE)
),
useCohortFeatures = list(include = FALSE),
useConceptSetFeatures = list(include = FALSE)
)
specs_dera <- singleNodeSetting(
plan = plan_dera,
cohortId = COHORT_ID,
cohortDatabaseSchema = CDM_SCHEMA,
cohortTable = COHORT_TBL,
cdmDatabaseSchema = CDM_SCHEMA,
vocabularyDatabaseSchema = CDM_SCHEMA,
aggregated = TRUE
)
results_dera <- executeSpecs(
connection, specs_dera,
tempEmulationSchema = TEMP_SCHEMA,
cleanTempTables = TRUE
)
head(results_dera[["1001"]], 10)plan_proc <- planAnalysis(
analysisWindows = windows,
useBaseFeatures = list(
condition_occurrence = list(include = FALSE),
condition_era = list(include = FALSE),
drug_exposure = list(include = FALSE),
drug_era = list(include = FALSE),
procedure_occurrence = list(include = TRUE, type = "start"),
observation = list(include = FALSE),
device_exposure = list(include = FALSE),
visit_occurrence = list(include = FALSE),
measurement = list(include = FALSE)
),
useCohortFeatures = list(include = FALSE),
useConceptSetFeatures = list(include = FALSE)
)
specs_proc <- singleNodeSetting(
plan = plan_proc,
cohortId = COHORT_ID,
cohortDatabaseSchema = CDM_SCHEMA,
cohortTable = COHORT_TBL,
cdmDatabaseSchema = CDM_SCHEMA,
vocabularyDatabaseSchema = CDM_SCHEMA,
aggregated = TRUE
)
results_proc <- executeSpecs(
connection, specs_proc,
tempEmulationSchema = TEMP_SCHEMA,
cleanTempTables = TRUE
)
head(results_proc[["1001"]], 10)plan_meas <- planAnalysis(
analysisWindows = windows,
useBaseFeatures = list(
condition_occurrence = list(include = FALSE),
condition_era = list(include = FALSE),
drug_exposure = list(include = FALSE),
drug_era = list(include = FALSE),
procedure_occurrence = list(include = FALSE),
observation = list(include = FALSE),
device_exposure = list(include = FALSE),
visit_occurrence = list(include = FALSE),
measurement = list(include = TRUE)
),
useCohortFeatures = list(include = FALSE),
useConceptSetFeatures = list(include = FALSE)
)
specs_meas <- singleNodeSetting(
plan = plan_meas,
cohortId = COHORT_ID,
cohortDatabaseSchema = CDM_SCHEMA,
cohortTable = COHORT_TBL,
cdmDatabaseSchema = CDM_SCHEMA,
vocabularyDatabaseSchema = CDM_SCHEMA,
aggregated = TRUE
)
results_meas <- executeSpecs(
connection, specs_meas,
tempEmulationSchema = TEMP_SCHEMA,
cleanTempTables = TRUE
)
head(results_meas[["1001"]], 10)plan_obs <- planAnalysis(
analysisWindows = windows,
useBaseFeatures = list(
condition_occurrence = list(include = FALSE),
condition_era = list(include = FALSE),
drug_exposure = list(include = FALSE),
drug_era = list(include = FALSE),
procedure_occurrence = list(include = FALSE),
observation = list(include = TRUE),
device_exposure = list(include = FALSE),
visit_occurrence = list(include = FALSE),
measurement = list(include = FALSE)
),
useCohortFeatures = list(include = FALSE),
useConceptSetFeatures = list(include = FALSE)
)
specs_obs <- singleNodeSetting(
plan = plan_obs,
cohortId = COHORT_ID,
cohortDatabaseSchema = CDM_SCHEMA,
cohortTable = COHORT_TBL,
cdmDatabaseSchema = CDM_SCHEMA,
vocabularyDatabaseSchema = CDM_SCHEMA,
aggregated = TRUE
)
results_obs <- executeSpecs(
connection, specs_obs,
tempEmulationSchema = TEMP_SCHEMA,
cleanTempTables = TRUE
)
head(results_obs[["1001"]])plan_visit <- planAnalysis(
analysisWindows = windows,
useBaseFeatures = list(
condition_occurrence = list(include = FALSE),
condition_era = list(include = FALSE),
drug_exposure = list(include = FALSE),
drug_era = list(include = FALSE),
procedure_occurrence = list(include = FALSE),
observation = list(include = FALSE),
device_exposure = list(include = FALSE),
visit_occurrence = list(include = TRUE, type = "overlap"),
measurement = list(include = FALSE)
),
useCohortFeatures = list(include = FALSE),
useConceptSetFeatures = list(include = FALSE)
)
specs_visit <- singleNodeSetting(
plan = plan_visit,
cohortId = COHORT_ID,
cohortDatabaseSchema = CDM_SCHEMA,
cohortTable = COHORT_TBL,
cdmDatabaseSchema = CDM_SCHEMA,
vocabularyDatabaseSchema = CDM_SCHEMA,
aggregated = TRUE
)
results_visit <- executeSpecs(
connection, specs_visit,
tempEmulationSchema = TEMP_SCHEMA,
cleanTempTables = TRUE
)
head(results_visit[["1001"]])Setting aggregated = FALSE returns one row per
patient-concept pair instead of summing across patients.
specs_raw <- singleNodeSetting(
plan = plan_cond,
cohortId = COHORT_ID,
cohortDatabaseSchema = CDM_SCHEMA,
cohortTable = COHORT_TBL,
cdmDatabaseSchema = CDM_SCHEMA,
vocabularyDatabaseSchema = CDM_SCHEMA,
aggregated = FALSE
)
results_raw <- executeSpecs(
connection, specs_raw,
tempEmulationSchema = TEMP_SCHEMA,
cleanTempTables = TRUE
)
cat("Patient-level rows (window 1):", nrow(results_raw[["1001"]]), "\n")
head(results_raw[["1001"]], 10)Enable several domains in a single plan for an integrated analysis.
plan_multi <- planAnalysis(
analysisWindows = windows,
useBaseFeatures = list(
condition_occurrence = list(include = TRUE, type = "start"),
condition_era = list(include = TRUE, type = "overlap"),
drug_exposure = list(include = TRUE),
drug_era = list(include = TRUE, type = "overlap"),
procedure_occurrence = list(include = TRUE),
observation = list(include = FALSE),
device_exposure = list(include = FALSE),
visit_occurrence = list(include = TRUE, type = "overlap"),
measurement = list(include = TRUE)
),
useCohortFeatures = list(include = FALSE),
useConceptSetFeatures = list(include = FALSE)
)
specs_multi <- singleNodeSetting(
plan = plan_multi,
cohortId = COHORT_ID,
cohortDatabaseSchema = CDM_SCHEMA,
cohortTable = COHORT_TBL,
cdmDatabaseSchema = CDM_SCHEMA,
vocabularyDatabaseSchema = CDM_SCHEMA,
aggregated = TRUE
)
cat("Total specs:", length(specs_multi), "\n")
results_multi <- executeSpecs(
connection, specs_multi,
tempEmulationSchema = TEMP_SCHEMA,
cleanTempTables = TRUE
)
# Summary across all specs
summary_df <- do.call(rbind, lapply(names(results_multi), function(nm) {
df <- results_multi[[nm]]
data.frame(analysis_id = nm, rows = nrow(df), stringsAsFactors = FALSE)
}))
summary_dfUse pre-defined cohorts as binary covariates. Here we test whether Celecoxib patients overlap with the GiBleed cohort.
plan_cohort <- planAnalysis(
analysisWindows = windows,
useBaseFeatures = list(
condition_occurrence = list(include = FALSE),
condition_era = list(include = FALSE),
drug_exposure = list(include = FALSE),
drug_era = list(include = FALSE),
procedure_occurrence = list(include = FALSE),
observation = list(include = FALSE),
device_exposure = list(include = FALSE),
visit_occurrence = list(include = FALSE),
measurement = list(include = FALSE)
),
useCohortFeatures = list(
include = TRUE,
type = "start",
cohortIds = c(3L, 4L),
cohortNames = c("GiBleed", "NSAIDs"),
cohortTable = "cohort",
covariateSchema = "main"
),
useConceptSetFeatures = list(include = FALSE)
)
specs_cohort <- singleNodeSetting(
plan = plan_cohort,
cohortId = COHORT_ID,
cohortDatabaseSchema = CDM_SCHEMA,
cohortTable = COHORT_TBL,
cdmDatabaseSchema = CDM_SCHEMA,
vocabularyDatabaseSchema = CDM_SCHEMA,
aggregated = TRUE
)
cat("Cohort feature specs:", length(specs_cohort), "\n")
results_cohort <- executeSpecs(
connection, specs_cohort,
tempEmulationSchema = TEMP_SCHEMA,
cleanTempTables = TRUE
)
# Show results for every cohort feature spec
for (nm in names(results_cohort)) {
cat("\n--- Analysis", nm, "---\n")
print(results_cohort[[nm]])
}plan_coh_ov <- planAnalysis(
analysisWindows = windows,
useBaseFeatures = list(
condition_occurrence = list(include = FALSE),
condition_era = list(include = FALSE),
drug_exposure = list(include = FALSE),
drug_era = list(include = FALSE),
procedure_occurrence = list(include = FALSE),
observation = list(include = FALSE),
device_exposure = list(include = FALSE),
visit_occurrence = list(include = FALSE),
measurement = list(include = FALSE)
),
useCohortFeatures = list(
include = TRUE,
type = "overlap",
cohortIds = c(3L),
cohortNames = c("GiBleed"),
cohortTable = "cohort",
covariateSchema = "main"
),
useConceptSetFeatures = list(include = FALSE)
)
specs_coh_ov <- singleNodeSetting(
plan = plan_coh_ov,
cohortId = COHORT_ID,
cohortDatabaseSchema = CDM_SCHEMA,
cohortTable = COHORT_TBL,
cdmDatabaseSchema = CDM_SCHEMA,
vocabularyDatabaseSchema = CDM_SCHEMA,
aggregated = TRUE
)
cat("Overlap flag:", specs_coh_ov[[1]]$overlap, "\n")
results_coh_ov <- executeSpecs(
connection, specs_coh_ov,
tempEmulationSchema = TEMP_SCHEMA,
cleanTempTables = TRUE
)
for (nm in names(results_coh_ov)) {
cat("\n--- Analysis", nm, "---\n")
print(results_coh_ov[[nm]])
}You can inspect the generated SQL without a database connection using
renderSpecSql() and renderAllSpecSql().
sql_default <- renderSpecSql(specs_cond[[1]])
cat("--- SQL Server (default) ---\n")
cat(substr(sql_default, 1, 500), "\n...\n")Translate to other dialects:
for (dialect in c("postgresql", "redshift", "oracle", "spark")) {
cat("\n--- Dialect:", dialect, "---\n")
sql_translated <- renderSpecSql(specs_cond[[1]], targetDialect = dialect)
cat(substr(sql_translated, 1, 400), "\n...\n")
}Batch rendering:
The number of specs scales linearly with the number of windows.
windows_8 <- defineAnalysisWindows(
startDays = c(-365, -180, -90, -30, 1, 31, 91, 181),
endDays = c( -1, -1, -1, -1, 30, 90, 180, 365)
)
plan_8w <- planAnalysis(
analysisWindows = windows_8,
useBaseFeatures = list(
condition_occurrence = list(include = TRUE, type = "start"),
condition_era = list(include = FALSE),
drug_exposure = list(include = FALSE),
drug_era = list(include = FALSE),
procedure_occurrence = list(include = FALSE),
observation = list(include = FALSE),
device_exposure = list(include = FALSE),
visit_occurrence = list(include = FALSE),
measurement = list(include = FALSE)
),
useCohortFeatures = list(include = FALSE),
useConceptSetFeatures = list(include = FALSE)
)
specs_8w <- singleNodeSetting(
plan = plan_8w,
cohortId = COHORT_ID,
cohortDatabaseSchema = CDM_SCHEMA,
cohortTable = COHORT_TBL,
cdmDatabaseSchema = CDM_SCHEMA,
vocabularyDatabaseSchema = CDM_SCHEMA,
aggregated = TRUE
)
cat("Specs with 8 windows:", length(specs_8w), "\n")
results_8w <- executeSpecs(
connection, specs_8w,
tempEmulationSchema = TEMP_SCHEMA,
cleanTempTables = TRUE
)
data.frame(
analysis_id = names(results_8w),
rows = vapply(results_8w, nrow, integer(1))
)plan_combined <- planAnalysis(
analysisWindows = windows,
useBaseFeatures = list(
condition_occurrence = list(include = TRUE, type = "start"),
condition_era = list(include = FALSE),
drug_exposure = list(include = TRUE),
drug_era = list(include = FALSE),
procedure_occurrence = list(include = TRUE),
observation = list(include = FALSE),
device_exposure = list(include = FALSE),
visit_occurrence = list(include = FALSE),
measurement = list(include = TRUE)
),
useCohortFeatures = list(
include = TRUE,
type = "start",
cohortIds = c(3L),
cohortNames = c("GiBleed"),
cohortTable = "cohort",
covariateSchema = "main"
),
useConceptSetFeatures = list(include = FALSE)
)
specs_combined <- singleNodeSetting(
plan = plan_combined,
cohortId = COHORT_ID,
cohortDatabaseSchema = CDM_SCHEMA,
cohortTable = COHORT_TBL,
cdmDatabaseSchema = CDM_SCHEMA,
vocabularyDatabaseSchema = CDM_SCHEMA,
aggregated = TRUE
)
cat("Total specs (4 base domains x 2 windows + 1 cohort x 2 windows):",
length(specs_combined), "\n")
results_combined <- executeSpecs(
connection, specs_combined,
tempEmulationSchema = TEMP_SCHEMA,
cleanTempTables = TRUE
)
summary_combined <- do.call(rbind, lapply(names(results_combined), function(nm) {
df <- results_combined[[nm]]
data.frame(
analysis_id = nm,
source = if (nrow(df) > 0) "data" else "empty",
rows = nrow(df),
stringsAsFactors = FALSE
)
}))
summary_combinedAll examples so far used Celecoxib (id = 1). Switching to a different
cohort is as simple as changing cohortId.
specs_diclo <- singleNodeSetting(
plan = plan_cond,
cohortId = 2L,
cohortDatabaseSchema = CDM_SCHEMA,
cohortTable = COHORT_TBL,
cdmDatabaseSchema = CDM_SCHEMA,
vocabularyDatabaseSchema = CDM_SCHEMA,
aggregated = TRUE
)
results_diclo <- executeSpecs(
connection, specs_diclo,
tempEmulationSchema = TEMP_SCHEMA,
cleanTempTables = TRUE
)
cat("Diclofenac condition covariates (pre-index):\n")
head(results_diclo[["1001"]], 10)stopOnError = FALSEWhen executing many specs, you can continue past failures.