% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/pulse_main.R
\name{PULSE}
\alias{PULSE}
\title{Process PULSE data from a single experiment  (\verb{STEPS 1-6})}
\usage{
PULSE(
  paths,
  window_width_secs = 30,
  window_shift_secs = 60,
  min_data_points = 0.8,
  interpolation_freq = 40,
  bandwidth = 0.2,
  doublecheck = TRUE,
  lim_n = 3,
  lim_sd = 0.75,
  raw_v_smoothed = TRUE,
  correct = TRUE,
  discard_channels = NULL,
  keep_raw_data = TRUE,
  subset = 0,
  subset_seed = NULL,
  subset_reindex = FALSE,
  process_large = FALSE,
  show_progress = TRUE,
  max_dataset_size = 20
)
}
\arguments{
\item{paths}{character vectors, containing file paths to CSV files produced by a PULSE system during a single experiment.}

\item{window_width_secs}{numeric, in seconds, defaults to \code{30}; the width of the time windows over which heart rate frequency will be computed.}

\item{window_shift_secs}{numeric, in seconds, defaults to \code{60}; by how much each subsequent window is shifted from the preceding one.}

\item{min_data_points}{numeric, defaults to \code{0.8}; decimal from 0 to 1, used as a threshold to discard incomplete windows where data is missing (e.g., if the sampling frequency is \code{20} and \code{window_width_secs = 30}, each window should include \code{600} data points, and so if \code{min_data_points = 0.8}, windows with less than \code{600 * 0.8 = 480} data points will be rejected).}

\item{interpolation_freq}{numeric, defautls to \code{40}; value expressing the frequency (in Hz) to which PULSE data should be interpolated. Can be set to \code{0} (zero) or any value equal or greater than \code{40} (the default). If set to zero, no interpolation is performed.}

\item{bandwidth}{numeric, defaults to \code{0.2}; the bandwidth for the Kernel Regression Smoother. If equal to \code{0} (zero) no smoothing is applied. Normally kept low (\code{0.1 - 0.3}) so that only very high frequency noise is removed, but can be pushed up all the way to \code{1} or above (especially when the heartbeat rate is expected to be slow, as is typical of oysters, but double check the resulting data). Type \code{?ksmooth} for additional info.}

\item{doublecheck}{logical, defaults to \code{TRUE}; should \code{\link[=pulse_doublecheck]{pulse_doublecheck()}} be used? (it is rare, but there are instances when it should be disabled).}

\item{lim_n}{numeric, defaults to \code{3}; minimum number of peaks detected in each time window for it to be considered a "keep".}

\item{lim_sd}{numeric, defaults to \code{0.75}; maximum value for the sd of the time intervals between each peak detected for it to be considered a "keep"}

\item{raw_v_smoothed}{logical, defaults to \code{TRUE}; indicates whether or not to also compute heart rates before applying smoothing; this will increase the quality of the output but also double the processing time.}

\item{correct}{logical, defaults to \code{TRUE}; if \code{FALSE}, data points with \code{hz} values likely double the real value are flagged \strong{BUT NOT CORRECTED}. If \code{TRUE}, \code{hz} (as well as \code{data}, \code{n}, \code{sd} and \code{ci}) are corrected accordingly. Note that the correction is not reversible!}

\item{discard_channels}{character vectors, containing the names of channels to be discarded from the analysis. \code{discard_channels} is forced to lowercase, but other than that, the \strong{exact} names must be provided. Discarding unused channels can greatly speed the workflow!}

\item{keep_raw_data}{logical, defaults to \code{TRUE}; If set to \code{FALSE}, \verb{$data} is set to \code{FALSE} (i.e., raw data is discarded), dramatically reducing the amount of disk space required to store the final output (usually, by two orders of magnitude). HOWEVER, note that it won't be possible to use \code{pulse_plot_raw()} anymore!}

\item{subset}{numerical, defaults to \code{0}; the number of time windows to keep from the entire dataset (or the number of entries to reject if set to a negative value); smaller subsets make the entire processing quicker and facilitate the execution of trial runs to optimize parameter selection before processing the entire dataset.}

\item{subset_seed}{numerical, defaults to \code{NULL}; only used if \code{subset} is different from \code{0}; \code{subset_seed} controls the seed used when extracting a subset of the available data; if set to \code{NULL}, a random seed is selected, resulting in rows being selected randomly; alternativelly, the user can set a specific seed in order to always select the same rows (important when the goal is to compare the impact of different parameter combinations using the exact same data points).}

\item{subset_reindex}{logical, defaults to \code{FALSE}; only used if \code{subset} is different from \code{0}; after extracting a subset of the available data, should rows be re-indexed (i.e., \code{.$i} made fully sequential); re-indexed rows make using \code{pulse_plot_raw()} easier, but row identity doesn't match anymore with row identity before subsetting.}

\item{process_large}{logical, defaults to \code{FALSE}; If set to \code{FALSE}, if the dataset used as input is large (i.e., combined file size greater than 20 MB, which is equivalent to three files each with a full hour of PULSE data), \code{PULSE} will not process the data and instead suggest the use of \code{\link[=PULSE_by_chunks]{PULSE_by_chunks()}}, which is designed to handle large datasets; If set to \code{TRUE}, \code{PULSE} will proceed with the attempt to process the dataset, but the system's memory may become overloaded and R may never finish the job.}

\item{show_progress}{logical, defaults to \code{FALSE}. If set to \code{TRUE}, progress messages will be provided.}

\item{max_dataset_size}{numeric, defaults to \code{21}. Corresponds to the maximum combined size (in Mb) that the dataset contained by the files in \code{paths} can be when \code{process_large} is set to \code{FALSE}. If that is the case, data processing will be aborted with a message explaining the remedies possible. This is a fail-safe to prevent \code{PULSE} from being asked to process a dataset that is larger than the user's machine can handle, a situation that typically leads to a stall (R doesn't fail, it just keeps trying without any progress being made). A conservative value of \code{21} will allow only a little more than 3 hours-worth of data to be processed (a PULSE csv file with 1 hour of data typically takes up to 7 Mb). If the machine has a large amount of RAM available, a higher value can be used. Alternatively, consider using the function \code{\link[=PULSE_by_chunks]{PULSE_by_chunks()}} instead.}
}
\value{
A tibble with nrows = (number of channels) * (number of windows in \code{pulse_data_split}) and 13 columns:
\itemize{
\item \code{i}, the order of each time window
\item \code{smoothed}, logical flagging smoothed data
\item \code{id}, PULSE channel IDs
\item \code{time}, time at the center of each time window
\item \code{data}, a list of tibbles with raw PULSE data for each combination of channel and window, with columns \code{time}, \code{val} and \code{peak} (\code{TRUE} in rows corresponding to wave peaks)
\item \code{hz}, heartbeat rate estimate (in Hz)
\item \code{n}, number of wave peaks identified
\item \code{sd}, standard deviation of the intervals between wave peaks
\item \code{ci}, confidence interval (hz ± ci)
\item \code{keep}, logical indicating whether data points meet N and SD criteria
\item \code{d_r}, ratio of consecutive asymmetric peaks
\item \code{d_f}, logical flagging data points where heart beat frequency is likely double the real value
}
}
\description{
\strong{ALL STEPS EXECUTED SEQUENTIALLY}
\itemize{
\item \verb{step 1} -- \code{\link[=pulse_read]{pulse_read()}}
\item \verb{step 2} -- \code{\link[=pulse_split]{pulse_split()}}
\item \verb{step 3} -- \code{\link[=pulse_optimize]{pulse_optimize()}}
\item \verb{step 4} -- \code{\link[=pulse_heart]{pulse_heart()}}
\item \verb{step 5} -- \code{\link[=pulse_doublecheck]{pulse_doublecheck()}}
\item \verb{step 6} -- \code{\link[=pulse_choose_keep]{pulse_choose_keep()}}
\item \verb{extra step} -- \code{\link[=pulse_normalize]{pulse_normalize()}}
\item \verb{extra step} -- \code{\link[=pulse_summarise]{pulse_summarise()}}
\item \code{visualization} -- \code{\link[=pulse_plot]{pulse_plot()}} and \code{\link[=pulse_plot_raw]{pulse_plot_raw()}}
}

This is a wrapper function that provides a shortcut to running all 6 steps of the PULSE multi-channel data processing pipeline in sequence, namely \code{pulse_read()} >> \code{pulse_split()} >> \code{pulse_optimize()} >> \code{pulse_heart()} >> \code{pulse_doublecheck()} >> \code{pulse_choose_keep()}.

Please note that the \code{heartbeatr} package is designed specifically for PULSE systems commercialized by the non-profit co-op ElectricBlue (https://electricblue.eu/pulse) and is likely to fail if data from any other system is used as input without matching file formatting.

\code{PULSE()} takes a vector of \code{paths} to PULSE csv files produced by a PULSE system during \strong{a single experiment} (either multi-channel or one-channel, but never both at the same time) and automatically computes the heartbeat frequencies in all target channels across use-defined time windows. The entire workflow may take less than 5 minutes to run on a small dataset (a few hours of data) if \code{params} are chosen with speed in mind and the code is run on a modern machine. Conversely, large datasets (spanning several days) may take hours or even days to run. In extreme situations, datasets may be too large for the machine to handle (due to memory limitations), and it may be better to process batches at a time (check \code{\link[=PULSE_by_chunks]{PULSE_by_chunks()}} and consider implementing a parallel computing strategy).
}
\section{One experiment}{

The \code{heartbeatr} workflow must be applied to a single experiment each time. By \emph{experiment} we mean a collection of PULSE data where all the relevant parameters are invariant, including (but not limited):
\itemize{
\item the version of the firmware installed in the PULSE device (multi-channel or one-channel)
\item the names of all channels (including unused channels)
\item the frequency at which data was captured
}

Note also that even if two PULSE systems have been used in the same \emph{scientific experiment}, data from each device must be processed independently, and only merged at the end. There's no drawback in doing so, it just is important to understand that that's how data must be processed by the \code{\link{heartbeatr-package}}.
}

\section{Normalizing and summarising data}{

Both \code{\link[=pulse_normalize]{pulse_normalize()}} and \code{\link[=pulse_summarise]{pulse_summarise()}} aren't included in \code{\link[=PULSE]{PULSE()}} because they aren't essential for the PULSE data processing pipeline and the choosing of values for their parameters require an initial look at the data. However, it is very often crucial to normalize the heart rate estimates produced so that comparisons across individuals can more reliably be made, and it also often important to manage the amount of data points produced before running statistical analyses on the data to avoid oversampling, meaning that users should consider running the output from \code{\link[=PULSE]{PULSE()}} though both these functions before considering the data as fully processed and ready for subsequent analysis. Check both functions for additional details on their role on the entire processing pipeline (\code{?pulse_normalize} and \code{?pulse_summarise}).
}

\section{Additional details}{

Check the help files of the underlying functions to obtain additional details about each of the steps implemented under \code{PULSE()}, namely:
\itemize{
\item \code{\link[=pulse_read]{pulse_read()}} describes constraints to the type of files that can be read with the \code{\link{heartbeatr-package}} and explains how time zones are handled.
\item \code{\link[=pulse_split]{pulse_split()}} provides important advice on how to set \code{window_width_secs} and \code{window_shift_secs}, what to expect when lower/higher values are used, and explains how easily to run the \code{\link{heartbeatr-package}} with parallel computing.
\item \code{\link[=pulse_optimize]{pulse_optimize()}} explains in detail how the optimization process (interpolation + smoothing) behaves and how it impacts the performance of the analysis.
\item \code{\link[=pulse_heart]{pulse_heart()}} outlines the algorithm used to identify peaks in the heart beat wave data and some of its limitations.
\item \code{\link[=pulse_doublecheck]{pulse_doublecheck()}} explains the method used to detect situations when the algorithm's processing resulted in an heart beat frequency double the real value.
\item \code{\link[=pulse_choose_keep]{pulse_choose_keep()}} selects the best estimates when \code{raw_v_smoothed = TRUE} and classifies data points as \code{keep} or \code{reject}.
}
}

\section{Also check}{

\itemize{
\item \code{\link[=pulse_normalize]{pulse_normalize()}} for important info about individual variations on baseline heart rate.
\item \code{\link[=pulse_summarise]{pulse_summarise()}} for important info about oversampling and strategies to handle that.
\item \code{\link[=PULSE_by_chunks]{PULSE_by_chunks()}} for processing large datasets.
}
}

\section{BPM}{

To convert to Beats Per Minute (bpm), simply multiply \code{hz} and \code{ci} by 60.
}

\examples{
## Begin prepare data ----
paths <- pulse_example()
chn <- paste0("c", formatC(1:10, width = 2, flag = "0"))
## End prepare data ----

# Execute the entire PULSE data processing pipeline with only one call
PULSE(
paths,
 discard_channels = chn[-8],
 raw_v_smoothed   = FALSE,
 show_progress    = FALSE
 )

# Equivalent to...
x <- pulse_read(paths)
multi <- x$multi
x$data <- x$data[,c("time", "c08")]
x <- pulse_split(x)
x <- pulse_optimize(x, raw_v_smoothed = FALSE, multi = multi)
x <- pulse_heart(x)
x <- pulse_doublecheck(x)
x <- pulse_choose_keep(x)
x

}
\seealso{
\itemize{
\item \code{\link[=approx]{approx()}} is used by \code{\link[=pulse_interpolate]{pulse_interpolate()}} for the linear interpolation of PULSE data
\item \code{\link[=ksmooth]{ksmooth()}} is used by \code{\link[=pulse_smooth]{pulse_smooth()}} for the kernel smoothing of PULSE data
\item \code{\link[=pulse_read]{pulse_read()}}, \code{\link[=pulse_split]{pulse_split()}}, \code{\link[=pulse_optimize]{pulse_optimize()}}, \code{\link[=pulse_heart]{pulse_heart()}}, \code{\link[=pulse_doublecheck]{pulse_doublecheck()}} and \code{\link[=pulse_choose_keep]{pulse_choose_keep()}} are the functions used in the complete \code{heartbeatr} processing workflow
\item \code{\link[=pulse_normalize]{pulse_normalize()}} and \code{\link[=pulse_summarise]{pulse_summarise()}} are important post-processing functions
\item \code{\link[=pulse_plot]{pulse_plot()}} and \code{\link[=pulse_plot_raw]{pulse_plot_raw()}} can be used to inspect the processed data
}
}
