The omophub package provides an R interface to the OMOPHub API for accessing OHDSI ATHENA standardized medical vocabularies. This vignette demonstrates basic usage patterns.
Install from GitHub:
The package requires an API key from OMOPHub.
Set your API key as an environment variable:
Or add it to your .Renviron file for persistence:
OMOPHUB_API_KEY=your_api_key_here
library(omophub)
# Create client (reads API key from environment)
client <- OMOPHubClient$new()
# Or provide API key explicitly
client <- OMOPHubClient$new(api_key = "your_api_key")
# With additional options
client <- OMOPHubClient$new(
api_key = "your_api_key",
timeout = 30,
max_retries = 3,
vocab_version = "2025.1"
)Retrieve a concept by its OMOP concept ID:
Get a concept by vocabulary-specific code:
Retrieve multiple concepts in a single request:
Basic search:
results <- client$search$basic("diabetes mellitus", page_size = 10)
for (concept in results$data) {
cat(sprintf("%s: %s\n", concept$concept_id, concept$concept_name))
}Search with filters:
Search using natural language queries powered by neural embeddings:
# Natural language search - understands clinical intent
results <- client$search$semantic("high blood sugar levels")
for (r in results$data$results) {
cat(sprintf("%s (similarity: %.2f)\n", r$concept_name, r$similarity_score))
}Filter semantic search results:
results <- client$search$semantic(
"heart attack",
vocabulary_ids = "SNOMED",
domain_ids = "Condition",
threshold = 0.5
)Fetch all semantic search results with automatic pagination:
Find concepts similar to a reference concept:
# Find concepts similar to Type 2 diabetes mellitus
similar <- client$search$similar(concept_id = 201826)
for (s in similar$similar_concepts) {
cat(sprintf("%s (score: %.2f)\n", s$concept_name, s$similarity_score))
}Search by natural language query with different algorithms:
# Semantic similarity (neural embeddings)
similar <- client$search$similar(
query = "high blood pressure",
algorithm = "semantic"
)
# Lexical similarity (string matching)
similar <- client$search$similar(
query = "high blood pressure",
algorithm = "lexical"
)
# Hybrid (combined - default)
similar <- client$search$similar(
query = "high blood pressure",
algorithm = "hybrid",
include_scores = TRUE,
include_explanations = TRUE
)Search for multiple queries in a single API call — much faster than individual requests when you have many terms to look up.
Execute up to 50 keyword searches at once:
results <- client$search$bulk_basic(list(
list(search_id = "q1", query = "diabetes mellitus"),
list(search_id = "q2", query = "hypertension"),
list(search_id = "q3", query = "aspirin")
), defaults = list(vocabulary_ids = list("SNOMED"), page_size = 5))
# Each result is matched by search_id
for (item in results$results) {
cat(sprintf("%s: %d results\n", item$search_id, length(item$results)))
}Execute up to 25 natural-language searches using neural embeddings:
results <- client$search$bulk_semantic(list(
list(search_id = "s1", query = "heart failure treatment options"),
list(search_id = "s2", query = "type 2 diabetes medication")
), defaults = list(threshold = 0.5, page_size = 10))
for (item in results$results) {
cat(sprintf("%s: %d results\n", item$search_id,
item$result_count %||% length(item$results)))
}Defaults apply to all searches; per-search values override them:
# Different domains per query, shared vocabulary filter
results <- client$search$bulk_basic(list(
list(search_id = "cond", query = "diabetes", domain_ids = list("Condition")),
list(search_id = "drug", query = "metformin", domain_ids = list("Drug"))
), defaults = list(vocabulary_ids = list("SNOMED", "RxNorm"), page_size = 5))Get suggestions for autocomplete:
# First page
results <- client$search$basic("diabetes", page = 1, page_size = 50)
# Check pagination info
print(results$meta$total_items)
print(results$meta$has_next)
# Get next page if available
if (isTRUE(results$meta$has_next)) {
page2 <- client$search$basic("diabetes", page = 2, page_size = 50)
}Find how a concept maps to other vocabularies:
result <- client$mappings$get(201826)
for (mapping in result$mappings) {
cat(sprintf("%s: %s\n",
mapping$target_vocabulary_id,
mapping$target_concept_name))
}Map to specific vocabularies:
Use tryCatch to handle errors: