library("allofus")
# Examples based on AoU snippets
aou_sql("
-- Compute the count of unique participants in our All of Us cohort.
SELECT
COUNT(DISTINCT person_id) AS total_number_of_participants
FROM
`{CDR}.person`
", collect = TRUE)
<- "hemoglobin"
MEASUREMENT_OF_INTEREST aou_sql('
-- Compute summary information for our measurements of interest for our cohort.
--
-- PARAMETERS:
-- MEASUREMENT_OF_INTEREST: a case-insensitive string, such as "hemoglobin", to be compared
-- to all measurement concept names to identify those of interest
WITH
--
-- Use a case insensitive string to search the measurement concept names of those
-- measurements we do have in the measurements table.
--
labs_of_interest AS (
SELECT
measurement_concept_id,
measurement_concept.concept_name AS measurement_name,
unit_concept_id,
unit_concept.concept_name AS unit_name
FROM
`{CDR}.measurement`
LEFT JOIN `{CDR}.concept` AS measurement_concept
ON measurement_concept.concept_id = measurement_concept_id
LEFT JOIN `{CDR}.concept` AS unit_concept
ON unit_concept.concept_id = unit_concept_id
WHERE
REGEXP_CONTAINS(measurement_concept.concept_name, r"(?i){MEASUREMENT_OF_INTEREST}")
GROUP BY
measurement_concept_id,
unit_concept_id,
measurement_concept.concept_name,
unit_concept.concept_name
)
--
-- Summarize the information about each measurement concept of interest that our
-- prior query identified.
--
SELECT
measurement_name AS measurement,
IFNULL(unit_name, "NA") AS unit,
COUNT(1) AS N,
COUNTIF(value_as_number IS NULL
AND (value_as_concept_id IS NULL
OR value_as_concept_id = 0)) AS missing,
MIN(value_as_number) AS min,
MAX(value_as_number) AS max,
AVG(value_as_number) AS avg,
STDDEV(value_as_number) AS stddev,
APPROX_QUANTILES(value_as_number, 4) AS quantiles,
COUNTIF(value_as_number IS NOT NULL) AS num_numeric_values,
COUNTIF(value_as_concept_id IS NOT NULL
AND value_as_concept_id != 0) AS num_concept_values,
COUNTIF(operator_concept_id IS NOT NULL) AS num_operators,
IF(src_id = "PPI/PM", "PPI", "EHR") AS measurement_source,
measurement_concept_id,
unit_concept_id
FROM
`{CDR}.measurement`
INNER JOIN
labs_of_interest USING(measurement_concept_id, unit_concept_id)
LEFT JOIN
`{CDR}.measurement_ext` USING(measurement_id)
GROUP BY
measurement_concept_id,
measurement_name,
measurement_source,
unit_concept_id,
unit_name
ORDER BY
N DESC
', collect = TRUE)
Execute a SQL query on the All of Us database
Description
Executes an SQL query on the All of Us database
Usage
aou_sql(
query,
collect = FALSE,
debug = FALSE,
...,
con = getOption("aou.default.con"),
CDR = getOption("aou.default.cdr")
)
Arguments
query
|
A SQL query (BigQuery dialect) to be executed. Interpreted with glue::glue() , so expressions enclosed with braces will be evaluated. References to “{CDR}” or “{cdr}” will be evaluated automatically (see examples).
|
collect
|
Whether to bring the resulting table into local memory (collect = TRUE ) as a dataframe or leave as a reference to a database table (for continued analysis using, e.g., dbplyr ). Defaults to FALSE.
|
debug
|
Print the query to the console; useful for debugging. |
…
|
All other arguments passed to bigrquery::bq_table_download() if collect = TRUE .
|
con
|
Connection to the allofus SQL database. Defaults to getOption(“aou.default.con”) , which is created automatically with aou_connect() . Only needed if collect = FALSE .
|
CDR
|
The name of the "curated data repository" that will be used in any references of the form “{CDR}” or “{cdr}” in the query (see examples). Defaults to getOption(“aou.default.cdr”) , which is Sys.getenv(‘WORKSPACE_CDR’) if not specified otherwise (i.e., the "mainline" CDR). On the controlled tier, specify the "base" CDR with CDR = paste0(Sys.getenv(‘WORKSPACE_CDR’), “_base”) .
|
Value
A dataframe if collect = TRUE
; a reference to a remote database table if not.