diff --git a/docs/source/developers/cpp/development.rst b/docs/source/developers/cpp/development.rst index 57bced49337..a6c09b47cad 100644 --- a/docs/source/developers/cpp/development.rst +++ b/docs/source/developers/cpp/development.rst @@ -74,6 +74,24 @@ corresponding executable from the command line, e.g.:: Code Style, Linting, and CI =========================== +Development Tool Requirements +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +The following tools and versions are required for C++ development: + +* **clang-format** version 14.0.6 or later for code formatting +* **pre-commit** version 2.17.0 or later (Ubuntu 22.04 ships with this version) +* **Ubuntu** 22.04 LTS or later for Linux development (Ubuntu 22.04 is supported until June 2027) + +These tools are used in our continuous integration pipeline to ensure code quality +and consistency. To set up pre-commit hooks locally, install the ``pre-commit`` +Python package and run:: + + $ pip install pre-commit + $ pre-commit install + +This will automatically run formatting and linting checks before each commit. + This project follows `Google's C++ Style Guide `_ with these exceptions: diff --git a/r/src/type_infer.cpp b/r/src/type_infer.cpp index a8334387c52..8606035eb1a 100644 --- a/r/src/type_infer.cpp +++ b/r/src/type_infer.cpp @@ -69,7 +69,7 @@ std::shared_ptr InferArrowTypeFromVector(SEXP x) { return date32(); } else if (Rf_inherits(x, "POSIXct")) { auto tzone_sexp = Rf_getAttrib(x, symbols::tzone); - if (Rf_isNull(tzone_sexp)) { + if (Rf_isNull(tzone_sexp) || XLENGTH(tzone_sexp) == 0) { auto systzone_sexp = cpp11::package("base")["Sys.timezone"]; return timestamp(TimeUnit::MICRO, CHAR(STRING_ELT(systzone_sexp(), 0))); } else { @@ -86,7 +86,7 @@ std::shared_ptr InferArrowTypeFromVector(SEXP x) { } if (Rf_inherits(x, "POSIXct")) { auto tzone_sexp = Rf_getAttrib(x, symbols::tzone); - if (Rf_isNull(tzone_sexp)) { + if (Rf_isNull(tzone_sexp) || XLENGTH(tzone_sexp) == 0) { auto systzone_sexp = cpp11::package("base")["Sys.timezone"]; return timestamp(TimeUnit::MICRO, CHAR(STRING_ELT(systzone_sexp(), 0))); } else { diff --git a/r/tests/testthat/test-issue-48832.R b/r/tests/testthat/test-issue-48832.R new file mode 100644 index 00000000000..40f98c2e5ff --- /dev/null +++ b/r/tests/testthat/test-issue-48832.R @@ -0,0 +1,35 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +test_that("zero-length POSIXct with empty tzone attribute handled safely", { + x <- as.POSIXct(character(0)) + attr(x, "tzone") <- character(0) + + # Should not crash or error + expect_error(type(x), NA) + + # Should default to no timezone (or empty string which effectively means local/no-tz behavior in arrow) + # When sys.timezone is picked up it might vary, but we just check it doesn't crash. + # If it picks up Sys.timezone(), checking exact equality might be flaky across environments if not mocked. + # So we primarily check for no error. + + # Also check write_parquet survival + tf <- tempfile() + on.exit(unlink(tf)) + expect_error(write_parquet(data.frame(x = x), tf), NA) + expect_true(file.exists(tf)) +})