% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/HaploDip_Neis_H.R
\name{compute_Hs_W}
\alias{compute_Hs_W}
\title{Compute per-window Nei's H (gene diversity)}
\usage{
compute_Hs_W(
  geno.data,
  pop.file,
  contigs,
  positions,
  window.size,
  verbose = TRUE
)
}
\arguments{
\item{geno.data}{A character matrix of genotype strings with dimensions
\code{n_sites x n_individuals}, as returned by [vcf2GT()].}

\item{pop.file}{A \code{data.frame} or \code{data.table} with at least two
columns: \code{ID} (individual identifiers matching the column names of
\code{geno.data}) and \code{Pop} (population labels).}

\item{contigs}{A character vector of length \code{n_sites} containing the
contig (chromosome) name for each variant site, as returned by [vcf2GT()].}

\item{positions}{A numeric vector of length \code{n_sites} containing the
physical position (bp) of each variant site, as returned by [vcf2GT()].}

\item{window.size}{A single positive integer giving the size of each
sliding window in base pairs}

\item{verbose}{Logical. If `TRUE` (default), print progress messages.}
}
\value{
A [data.table::data.table] with one row per population-contig-window
  combination and the following columns:
  \describe{
    \item{Pop}{Population label.}
    \item{Contig}{Contig (chromosome) name.}
    \item{Window_starts}{Genomic coordinate (bp) of the first position in
      the window.}
    \item{Window_ends}{Genomic coordinate (bp) of the last position in the
      window (\code{Window_starts + window.size - 1}).}
    \item{N_sites}{Total number of called genotype entries (diploid +
      haploid) within the window.}
    \item{Neis_H}{Nei's H (gene diversity) for the window, computed as
      \code{2 * Freq.Ref * Freq.Alt}.}
  }
}
\description{
Iterates over each population defined in \code{pop.file}, splits the
genotype data by contig, and slides a fixed-size window along each contig
to compute Nei's H (probability of sampling two different alleles) within
that window. Nei's H is calculated as \code{2pq}, where \code{p} and
\code{q} are the reference and alternative allele frequencies respectively.
Both diploid genotypes (\code{"0/0"}, \code{"0/1"}, \code{"1/1"}) and
haploid genotypes (\code{"0"}, \code{"1"}) are recognised when computing
allele frequencies. Despite the different ploidies, allele frequencies
should be the same between sexes, which means that Nei's H 
is agnostic to ploidy.
}
\examples{
vcf_path <- system.file("extdata",
                        "example.vcf",
                        package = "HaploDiploidEquilibrium")

result <- vcf2GT(vcf_path)
gt       <- result$gt_matrix
contigs  <- result$contig_vector
pos      <- result$positions

pop.file <- data.frame(ID  = colnames(gt),
                       Pop = c("PopA","PopA","PopB","PopB","PopB"))

hs <- compute_Hs_W(geno.data   = gt,
                   pop.file    = pop.file,
                   contigs     = contigs,
                   positions   = pos,
                   window.size = 10000)

}
\seealso{
[summarize_NeisH()] for computing weighted genome-wide summary
  statistics from the output.
}
