Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 18 additions & 0 deletions docs/source/developers/cpp/development.rst
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,24 @@ corresponding executable from the command line, e.g.::
Code Style, Linting, and CI
===========================

Development Tool Requirements
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

The following tools and versions are required for C++ development:

* **clang-format** version 14.0.6 or later for code formatting
* **pre-commit** version 2.17.0 or later (Ubuntu 22.04 ships with this version)
* **Ubuntu** 22.04 LTS or later for Linux development (Ubuntu 22.04 is supported until June 2027)

These tools are used in our continuous integration pipeline to ensure code quality
and consistency. To set up pre-commit hooks locally, install the ``pre-commit``
Python package and run::

$ pip install pre-commit
$ pre-commit install

This will automatically run formatting and linting checks before each commit.

This project follows `Google's C++ Style Guide
<https://google.github.io/styleguide/cppguide.html>`_ with these exceptions:

Expand Down
4 changes: 2 additions & 2 deletions r/src/type_infer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@ std::shared_ptr<arrow::DataType> InferArrowTypeFromVector<INTSXP>(SEXP x) {
return date32();
} else if (Rf_inherits(x, "POSIXct")) {
auto tzone_sexp = Rf_getAttrib(x, symbols::tzone);
if (Rf_isNull(tzone_sexp)) {
if (Rf_isNull(tzone_sexp) || XLENGTH(tzone_sexp) == 0) {
auto systzone_sexp = cpp11::package("base")["Sys.timezone"];
return timestamp(TimeUnit::MICRO, CHAR(STRING_ELT(systzone_sexp(), 0)));
} else {
Expand All @@ -86,7 +86,7 @@ std::shared_ptr<arrow::DataType> InferArrowTypeFromVector<REALSXP>(SEXP x) {
}
if (Rf_inherits(x, "POSIXct")) {
auto tzone_sexp = Rf_getAttrib(x, symbols::tzone);
if (Rf_isNull(tzone_sexp)) {
if (Rf_isNull(tzone_sexp) || XLENGTH(tzone_sexp) == 0) {
auto systzone_sexp = cpp11::package("base")["Sys.timezone"];
return timestamp(TimeUnit::MICRO, CHAR(STRING_ELT(systzone_sexp(), 0)));
} else {
Expand Down
35 changes: 35 additions & 0 deletions r/tests/testthat/test-issue-48832.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.

test_that("zero-length POSIXct with empty tzone attribute handled safely", {
x <- as.POSIXct(character(0))
attr(x, "tzone") <- character(0)

# Should not crash or error
expect_error(type(x), NA)

# Should default to no timezone (or empty string which effectively means local/no-tz behavior in arrow)
# When sys.timezone is picked up it might vary, but we just check it doesn't crash.
# If it picks up Sys.timezone(), checking exact equality might be flaky across environments if not mocked.
# So we primarily check for no error.

# Also check write_parquet survival
tf <- tempfile()
on.exit(unlink(tf))
expect_error(write_parquet(data.frame(x = x), tf), NA)
expect_true(file.exists(tf))
})
Comment on lines +18 to +35
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Would you mind putting this test in somewhere around

test_that("array supports POSIXct (ARROW-3340)", {
times <- lubridate::ymd_hms("2018-10-07 19:04:05") + 1:10
expect_array_roundtrip(times, timestamp("us", "UTC"))
times[5] <- NA
expect_array_roundtrip(times, timestamp("us", "UTC"))
times2 <- lubridate::ymd_hms("2018-10-07 19:04:05", tz = "America/New_York") + 1:10
expect_array_roundtrip(times2, timestamp("us", "America/New_York"))
})
test_that("array uses local timezone for POSIXct without timezone", {
withr::with_envvar(c(TZ = ""), {
times <- strptime("2019-02-03 12:34:56", format = "%Y-%m-%d %H:%M:%S") + 1:10
expect_equal(attr(times, "tzone"), NULL)
expect_array_roundtrip(times, timestamp("us", Sys.timezone()))
# Also test the INTSXP code path
skip("Ingest_POSIXct only implemented for REALSXP")
times_int <- as.integer(times)
attributes(times_int) <- attributes(times)
expect_array_roundtrip(times_int, timestamp("us", ""))
})
# If there is a timezone set, we record that
withr::with_timezone("Pacific/Marquesas", {
times <- strptime("2019-02-03 12:34:56", format = "%Y-%m-%d %H:%M:%S") + 1:10
expect_equal(attr(times, "tzone"), "Pacific/Marquesas")
expect_array_roundtrip(times, timestamp("us", "Pacific/Marquesas"))
times_with_tz <- strptime(
"2019-02-03 12:34:56",
format = "%Y-%m-%d %H:%M:%S",
tz = "Asia/Katmandu"
) +
1:10
expect_equal(attr(times, "tzone"), "Asia/Katmandu")
expect_array_roundtrip(times, timestamp("us", "Asia/Katmandu"))
})
# and although the TZ is NULL in R, we set it to the Sys.timezone()
withr::with_timezone(NA, {
times <- strptime("2019-02-03 12:34:56", format = "%Y-%m-%d %H:%M:%S") + 1:10
expect_equal(attr(times, "tzone"), NULL)
expect_array_roundtrip(times, timestamp("us", Sys.timezone()))
})
})

and possibly for the parquet section, put it somewhere near

test_that("write_parquet() can truncate timestamps", {
tab <- Table$create(x1 = as.POSIXct("2020/06/03 18:00:00", tz = "UTC"))
expect_type_equal(tab$x1, timestamp("us", "UTC"))
tf <- tempfile()
on.exit(unlink(tf))
write_parquet(tab, tf, coerce_timestamps = "ms", allow_truncated_timestamps = TRUE)
new <- read_parquet(tf, as_data_frame = FALSE)
expect_type_equal(new$x1, timestamp("ms", "UTC"))
expect_equal(as.data.frame(tab), as.data.frame(new))
})
test_that("make_valid_parquet_version()", {
expect_equal(
make_valid_parquet_version("1.0"),
ParquetVersionType$PARQUET_1_0
)
expect_equal(
make_valid_parquet_version("2.4"),
ParquetVersionType$PARQUET_2_4
)
expect_equal(
make_valid_parquet_version("2.6"),
ParquetVersionType$PARQUET_2_6
)
expect_equal(
make_valid_parquet_version("latest"),
ParquetVersionType$PARQUET_2_6
)
expect_equal(make_valid_parquet_version(1), ParquetVersionType$PARQUET_1_0)
expect_equal(make_valid_parquet_version(1.0), ParquetVersionType$PARQUET_1_0)
expect_equal(make_valid_parquet_version(2.4), ParquetVersionType$PARQUET_2_4)
})