Overview
TileDB supports a wide range of date and time types as shown in the documention for Datetimes.
The supported range of dates and times correspond to the dates and times supported by the Python extension numpy, and is described in its documentation.
import numpy as np
import datetime
day = "1971-02-03 04:05:06" # third day of second month of first year past epoch
[np.datetime64(day, 'Y').astype('int64'), # extract year, month, ... as an int64
np.datetime64(day, 'M').astype('int64'),
np.datetime64(day, 'D').astype('int64'),
np.datetime64(day, 'h').astype('int64'),
np.datetime64(day, 'm').astype('int64'),
np.datetime64(day, 's').astype('int64')]
## [1, 13, 398, 9556, 573365, 34401906]R has date and datetime support built-in. The Date supports dates using an integer count since the epoch, and matches the D value from the previous example. Similarly, the ‘compact’ POSIXct representation of a Datetime uses the number of seconds since the epoch and corresponds to the s value from the previous example.
as.Date(398, origin="1970-01-01") # conversion from numeric input requires 'origin'
## [1] "1971-02-03"
as.POSIXct(34401906, origin="1970-01-01", tz="UTC", usetz=TRUE)
## [1] "1971-02-03 04:05:06 UTC"R can reconstruct dates and times from the numpy representation using the epoch as a ‘base’ date along with time period calculations. This can be done using base R (adding to Date or POSIXct objects), by using the lubridate package for a number of intermediate formats and by using the nanotime package for higher-resolution periods and intervals.
suppressMessages(library(lubridate))
ymd("1970-01-01") + c(years(1), months(13), days(398))
## [1] "1971-01-01" "1971-02-01" "1971-02-03"
ymd_hms("1970-01-01 00:00:00") + c(hours(9556), minutes(573365))
## [1] "1971-02-03 04:00:00 UTC" "1971-02-03 04:05:00 UTC"
suppressMessages(library(nanotime))
nanotime("1970-01-01T00:00:00+00:00") + nanoduration(hours=2, minutes=3,
seconds=4, nanoseconds=5)
## [1] 1970-01-01T02:03:04.000000005+00:00Python and R Interoperability
Coarsest: Year
Python
import numpy as np
import sys
import os
import tiledb
uri = "/tmp/tiledb/dt_year"
dom = tiledb.Domain(tiledb.Dim(name="rows",
domain=(np.datetime64('2001-01-01'), np.datetime64('2030-12-31')),
tile=np.timedelta64(10, 'Y'),
dtype=np.datetime64('', 'Y')))
schema = tiledb.ArraySchema(domain=dom,
sparse=True,
attrs=[tiledb.Attr(name="a", dtype=np.int32)])
if (os.path.isdir(uri)):
tiledb.VFS().remove_dir(uri)
tiledb.SparseArray.create(uri, schema)
with tiledb.SparseArray(uri, mode='w') as A:
I = [np.datetime64('2001-01-01'),np.datetime64('2002-01-01'),np.datetime64('2003-01-01')]
data = np.array(([1,2,3]))
A[I] = dataR
library(tiledb)
uri <- "/tmp/tiledb/dt_year"
arr <- tiledb_array(uri, return_as="data.frame")
arr[]
## rows a
## 1 2001-01-01 1
## 2 2002-01-01 2
## 3 2003-01-01 3
## we can also look at 'raw' int64 values:
datetimes_as_int64(arr) <- TRUE
arr[]
## rows a
## 1 31 1
## 2 32 2
## 3 33 3Day
Python
import numpy as np
import sys
import os
import tiledb
uri = "/tmp/tiledb/dt_day"
dom = tiledb.Domain(tiledb.Dim(name="rows",
domain=(np.datetime64('2001-01-01'), np.datetime64('2030-12-31')),
tile=np.timedelta64(10, 'D'),
dtype=np.datetime64('', 'D')))
schema = tiledb.ArraySchema(domain=dom,
sparse=True,
attrs=[tiledb.Attr(name="a", dtype=np.int32)])
if (os.path.isdir(uri)):
tiledb.VFS().remove_dir(uri)
tiledb.SparseArray.create(uri, schema)
with tiledb.SparseArray(uri, mode='w') as A:
I = [np.datetime64('2001-01-01'),np.datetime64('2001-01-02'),np.datetime64('2001-01-03')]
data = np.array(([1,2,3]))
A[I] = dataR
library(tiledb)
uri <- "/tmp/tiledb/dt_day"
arr <- tiledb_array(uri, return_as="data.frame")
arr[]
## rows a
## 1 2001-01-01 1
## 2 2001-01-02 2
## 3 2001-01-03 3Minute
Python
import numpy as np
import sys
import os
import tiledb
uri = "/tmp/tiledb/dt_min"
dom = tiledb.Domain(tiledb.Dim(name="rows",
domain=(np.datetime64('2001-01-01'), np.datetime64('2030-12-31')),
tile=np.timedelta64(10, 'm'),
dtype=np.datetime64('', 'm')))
schema = tiledb.ArraySchema(domain=dom,
sparse=True,
attrs=[tiledb.Attr(name="a", dtype=np.int32)])
if (os.path.isdir(uri)):
tiledb.VFS().remove_dir(uri)
tiledb.SparseArray.create(uri, schema)
with tiledb.SparseArray(uri, mode='w') as A:
I = [np.datetime64('2001-01-01 00:00'),
np.datetime64('2001-01-02 00:01'),
np.datetime64('2001-01-03 00:02')]
data = np.array(([1,2,3]))
A[I] = dataR
library(tiledb)
uri <- "/tmp/tiledb/dt_min"
arr <- tiledb_array(uri, return_as="data.frame")
arr[]
## rows a
## 1 2001-01-01 00:00:00 1
## 2 2001-01-02 00:01:00 2
## 3 2001-01-03 00:02:00 3Millisecond
Python
import numpy as np
import sys
import os
import tiledb
uri = "/tmp/tiledb/dt_ms"
dom = tiledb.Domain(tiledb.Dim(name="rows",
domain=(np.datetime64('1969-01-01'), np.datetime64('2030-12-31')),
tile=np.timedelta64(10, 'ms'),
dtype=np.datetime64('', 'ms')))
schema = tiledb.ArraySchema(domain=dom,
sparse=True,
attrs=[tiledb.Attr(name="a", dtype=np.int32)])
if (os.path.isdir(uri)):
tiledb.VFS().remove_dir(uri)
tiledb.SparseArray.create(uri, schema)
with tiledb.SparseArray(uri, mode='w') as A:
I = [np.datetime64('1970-01-01 00:00:00.001'),
np.datetime64('1980-01-01 00:00:00.002'),
np.datetime64('1990-01-01 00:00:00.003'),
np.datetime64('2000-01-01 00:00:00.004'),
np.datetime64('2010-01-01 00:00:00.005'),
np.datetime64('2020-01-01 00:00:00.006')]
data = np.array(([1,2,3,4,5,6]))
A[I] = dataR
library(tiledb)
uri <- "/tmp/tiledb/dt_ms"
arr <- tiledb_array(uri, return_as="data.frame")
arr[]
## rows a
## 1 1970-01-01 00:00:00.001 1
## 2 1980-01-01 00:00:00.002 2
## 3 1990-01-01 00:00:00.003 3
## 4 2000-01-01 00:00:00.004 4
## 5 2010-01-01 00:00:00.005 5
## 6 2020-01-01 00:00:00.006 6Microsecond
Python
import numpy as np
import sys
import os
import tiledb
uri = "/tmp/tiledb/dt_us"
dom = tiledb.Domain(tiledb.Dim(name="rows",
domain=(np.datetime64('1969-01-01'), np.datetime64('2030-12-31')),
tile=np.timedelta64(10, 'us'),
dtype=np.datetime64('', 'us')))
schema = tiledb.ArraySchema(domain=dom,
sparse=True,
attrs=[tiledb.Attr(name="a", dtype=np.int32)])
if (os.path.isdir(uri)):
tiledb.VFS().remove_dir(uri)
tiledb.SparseArray.create(uri, schema)
with tiledb.SparseArray(uri, mode='w') as A:
I = [np.datetime64('1970-01-01 00:00:00.000001'),
np.datetime64('1980-01-01 00:00:00.000002'),
np.datetime64('1990-01-01 00:00:00.000003'),
np.datetime64('2000-01-01 00:00:00.000004'),
np.datetime64('2010-01-01 00:00:00.000005'),
np.datetime64('2020-01-01 00:00:00.000006')]
data = np.array(([1,2,3,4,5,6]))
A[I] = dataR
library(tiledb)
uri <- "/tmp/tiledb/dt_us"
arr <- tiledb_array(uri, return_as="data.frame")
arr[]
## rows a
## 1 1970-01-01 00:00:00.000001 1
## 2 1980-01-01 00:00:00.000001 2
## 3 1990-01-01 00:00:00.000002 3
## 4 2000-01-01 00:00:00.000003 4
## 5 2010-01-01 00:00:00.000005 5
## 6 2020-01-01 00:00:00.000005 6Nanosecond
Python
import numpy as np
import sys
import os
import tiledb
uri = "/tmp/tiledb/dt_ns"
dom = tiledb.Domain(tiledb.Dim(name="rows",
domain=(np.datetime64('1969-01-01'), np.datetime64('2030-12-31')),
tile=np.timedelta64(10, 'ns'),
dtype=np.datetime64('', 'ns')))
schema = tiledb.ArraySchema(domain=dom,
sparse=True,
attrs=[tiledb.Attr(name="a", dtype=np.int32)])
if (os.path.isdir(uri)):
tiledb.VFS().remove_dir(uri)
tiledb.SparseArray.create(uri, schema)
with tiledb.SparseArray(uri, mode='w') as A:
I = [np.datetime64('1970-01-01 00:00:00.000000001'),
np.datetime64('1980-01-01 00:00:00.000000002'),
np.datetime64('1990-01-01 00:00:00.000000003'),
np.datetime64('2000-01-01 00:00:00.000000004'),
np.datetime64('2010-01-01 00:00:00.000000005'),
np.datetime64('2020-01-01 00:00:00.000000006')]
data = np.array(([1,2,3,4,5,6]))
A[I] = dataR
library(tiledb)
uri <- "/tmp/tiledb/dt_ns"
arr <- tiledb_array(uri, return_as="data.frame")
arr[]
## rows a
## 1 1970-01-01T00:00:00.000000001+00:00 1
## 2 1980-01-01T00:00:00.000000002+00:00 2
## 3 1990-01-01T00:00:00.000000003+00:00 3
## 4 2000-01-01T00:00:00.000000004+00:00 4
## 5 2010-01-01T00:00:00.000000005+00:00 5
## 6 2020-01-01T00:00:00.000000006+00:00 6Use integer64 Directly
Sometimes we may want to access the date or datetimes value in their native integer64 format. To do so, we set a toggle when opening the array as shown in the following example which uses the array from the preceding example (at resolution of nanosecond).
library(tiledb)
uri <- "/tmp/tiledb/dt_ns"
arr <- tiledb_array(uri, return_as="data.frame", datetimes_as_int64=TRUE)
arr[]
## rows a
## 1 1 1
## 2 315532800000000002 2
## 3 631152000000000003 3
## 4 946684800000000004 4
## 5 1262304000000000005 5
## 6 1577836800000000006 6We can also write integer64 types. The following example adds two extra rows:
library(tiledb)
uri <- "/tmp/tiledb/dt_ns"
arr <- tiledb_array(uri, return_as="data.frame", datetimes_as_int64=TRUE)
arr[] <- data.frame( rows=bit64::as.integer64(2:3), a=102:103)
arr[]
## rows a
## 1 1 1
## 2 2 102
## 3 3 103
## 4 315532800000000002 2
## 5 631152000000000003 3
## 6 946684800000000004 4
## 7 1262304000000000005 5
## 8 1577836800000000006 6