% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/unicefData.R, R/zzz_aliases.R
\name{unicefData}
\alias{unicefData}
\alias{unicefdata}
\title{Fetch UNICEF SDMX data or structure}
\usage{
unicefData(
  indicator = NULL,
  dataflow = NULL,
  countries = NULL,
  year = NULL,
  sex = "_T",
  totals = FALSE,
  age = NULL,
  wealth = NULL,
  residence = NULL,
  maternal_edu = NULL,
  tidy = TRUE,
  include_label_columns = FALSE,
  country_names = TRUE,
  max_retries = 3,
  cache = FALSE,
  page_size = 1e+05,
  detail = c("data", "structure"),
  version = NULL,
  labels = "id",
  metadata = "light",
  format = c("long", "wide", "wide_indicators", "wide_attributes", "wide_sex",
    "wide_age", "wide_wealth", "wide_residence", "wide_maternal_edu"),
  pivot = NULL,
  latest = FALSE,
  circa = FALSE,
  add_metadata = NULL,
  dropna = FALSE,
  simplify = FALSE,
  mrv = NULL,
  raw = FALSE,
  ignore_duplicates = FALSE
)

unicefdata(
  indicator = NULL,
  dataflow = NULL,
  countries = NULL,
  year = NULL,
  sex = "_T",
  totals = FALSE,
  age = NULL,
  wealth = NULL,
  residence = NULL,
  maternal_edu = NULL,
  tidy = TRUE,
  include_label_columns = FALSE,
  country_names = TRUE,
  max_retries = 3,
  cache = FALSE,
  page_size = 1e+05,
  detail = c("data", "structure"),
  version = NULL,
  labels = "id",
  metadata = "light",
  format = c("long", "wide", "wide_indicators", "wide_attributes", "wide_sex",
    "wide_age", "wide_wealth", "wide_residence", "wide_maternal_edu"),
  pivot = NULL,
  latest = FALSE,
  circa = FALSE,
  add_metadata = NULL,
  dropna = FALSE,
  simplify = FALSE,
  mrv = NULL,
  raw = FALSE,
  ignore_duplicates = FALSE
)
}
\arguments{
\item{indicator}{Character vector of indicator codes (e.g., "CME_MRY0T4").}

\item{dataflow}{Character vector of dataflow IDs (e.g., "CME", "NUTRITION").}

\item{countries}{Character vector of ISO3 country codes (e.g., c("ALB", "USA")).
If NULL (default), fetches all countries.}

\item{year}{Year specification. Supports multiple formats:
\itemize{
\item NULL: All available years (default)
\item Single integer: Just that year (e.g., 2020)
\item String with colon: Range (e.g., "2015:2023")
\item String with comma: Non-contiguous years (e.g., "2015,2018,2020")
\item Integer vector: Explicit list of years (e.g., c(2015, 2018, 2020))
}}

\item{sex}{Sex disaggregation: "_T" (total, default), "F" (female), "M" (male).}

\item{totals}{Logical; if FALSE (default), excludes observations with _T (total) codes in dimension values,
matching Python/Stata behavior. Set to TRUE to include totals.}

\item{age}{Filter by age group. Default is NULL (keeps totals).}

\item{wealth}{Filter by wealth quintile. Default is NULL (keeps totals).}

\item{residence}{Filter by residence (e.g. "URBAN", "RURAL"). Default is NULL (keeps totals).}

\item{maternal_edu}{Filter by maternal education. Default is NULL (keeps totals).}

\item{tidy}{Logical; if TRUE (default), returns cleaned tibble with standardized column names.}

\item{include_label_columns}{Logical; if FALSE (default), drops human-readable label-expansion columns added by SDMX when labels=both; produces a codes-only schema consistent across R/Python/Stata.}

\item{country_names}{Logical; if TRUE (default), adds country name column.}

\item{max_retries}{Number of retry attempts on failure (default: 3).
Previously called 'retry'. Both parameter names are supported.}

\item{cache}{Logical; if TRUE, memoises results.}

\item{page_size}{Integer rows per page (default: 100000).}

\item{detail}{"data" (default) or "structure" for metadata.}

\item{version}{Optional SDMX version; if NULL, auto-detected.}

\item{labels}{Label format for SDMX requests: "id" (codes only, default),
"name" (labels only), or "both" (codes and labels).}

\item{metadata}{Metadata detail level: "light" (default) or "full".}

\item{format}{Output format: "long" (default), "wide" (years as columns),
"wide_indicators" (indicators as columns), or wide by dimension:
"wide_sex", "wide_age", "wide_wealth", "wide_residence", "wide_maternal_edu".}

\item{pivot}{Character vector of column(s) to pivot to wide format.
Alternative to format parameter for custom pivoting.}

\item{latest}{Logical; if TRUE, keep only the most recent non-missing value per country.
The year may differ by country. Useful for cross-sectional analysis.}

\item{circa}{Logical; if TRUE, for each specified year find the closest available
data point. When exact years aren't available, returns observations with periods
closest to the requested year(s). Different countries may have different actual
years. Only applies when specific years are requested.}

\item{add_metadata}{Character vector of metadata to add: "region", "income_group",
"continent", "indicator_name", "indicator_category".}

\item{dropna}{Logical; if TRUE, remove rows with missing values.}

\item{simplify}{Logical; if TRUE, keep only essential columns.}

\item{mrv}{Integer; keep only the N most recent values per country (Most Recent Values).}

\item{raw}{Logical; if TRUE, return raw SDMX data without column standardization.
Default is FALSE (clean, standardized output matching Python package).}

\item{ignore_duplicates}{Logical; if FALSE (default), raises an error when exact
duplicate rows are found (all column values identical). Set to TRUE to allow
automatic removal of duplicates.}
}
\value{
Tibble with indicator data, or xml_document if detail="structure".
The 'period' column contains decimal years (see Time Period Handling section).
}
\description{
Download UNICEF indicator data from the SDMX data warehouse.
Supports automatic paging, retrying on transient failure, memoisation, and tidy-up.

This function uses unified parameter names consistent with the Python package.
}
\section{Time Period Handling}{

The UNICEF SDMX API returns TIME_PERIOD values in various formats (annual "2020"
or monthly "2020-03"). This function automatically converts monthly periods to
decimal years for consistent time-series analysis:
\itemize{
\item "2020" becomes 2020.0 (integer year)
\item "2020-01" becomes 2020.0833 (2020 + 1/12, January)
\item "2020-06" becomes 2020.5000 (2020 + 6/12, June)
\item "2020-11" becomes 2020.9167 (2020 + 11/12, November)
}
Formula: decimal_year = year + month/12
}

\section{Cross-Platform Consistency}{

By default, unicefData returns a codes-only schema that matches the Python
and Stata implementations. Specifically:
\itemize{
\item SDMX requests use codes (\code{labels=id}) or client-side filtering removes
human-readable label-expansion columns.
\item Output keeps standardized lowercase context columns (e.g., \code{iso3},
\code{indicator}, \code{period}, \code{value}) plus code columns for dimensions.
\item Indicator-specific dimension code columns are preserved (often lowercase).
\item Duplicate label columns are not included unless
\code{include_label_columns = TRUE} is explicitly set.
}

This ensures column/row counts align across R, Python, and Stata by default.
}

\examples{
\donttest{
# Fetch under-5 mortality for year range
df <- unicefData(
  indicator = "CME_MRY0T4",
  countries = c("ALB", "USA", "BRA"),
  year = "2015:2023"
)

# Single year
df <- unicefData(
  indicator = "CME_MRY0T4",
  countries = c("ALB", "USA"),
  year = 2020
)

# Non-contiguous years
df <- unicefData(
  indicator = "CME_MRY0T4",
  year = "2015,2018,2020"
)

# Circa mode - find closest available year
df <- unicefData(
  indicator = "CME_MRY0T4",
  year = 2015,
  circa = TRUE  # Returns closest to 2015 for each country
)

# Get latest value per country (cross-sectional)
df <- unicefData(
  indicator = "CME_MRY0T4",
  latest = TRUE
)

# Wide format with region metadata
df <- unicefData(
  indicator = "CME_MRY0T4",
  format = "wide",
  add_metadata = c("region", "income_group")
)

# Multiple indicators merged automatically
df <- unicefData(
  indicator = c("CME_MRY0T4", "NT_ANT_HAZ_NE2_MOD"),
  format = "wide_indicators",
  latest = TRUE
)
}
}
