Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
37 commits
Select commit Hold shift + click to select a range
b4037db
Add GitHub Actions workflow for type checking (mypy, pyright, ty). Ad…
rok Dec 21, 2025
c789d3f
Update ci/scripts/python_test_type_annotations.sh
rok Dec 23, 2025
750bd18
Apply suggestion from @raulcd
rok Jan 14, 2026
09a9a63
review feedback
rok Jan 14, 2026
18a1572
include dev/update_stub_docstrings.py
rok Jan 14, 2026
6851321
Use PYARROW_TEST_ANNOTATIONS in windows build, disable wheel docstrin…
rok Jan 14, 2026
4bfb3cd
work on dev/update_stub_docstrings.py
rok Jan 14, 2026
bee2f9d
further work on dev/update_stub_docstrings.py
rok Jan 14, 2026
44cab7d
lint
rok Jan 14, 2026
0c2fed5
add click for docstring population
rok Jan 14, 2026
ed124bb
Remove dependencies on click and griffe
rok Jan 14, 2026
b7a62e1
fix import paths
rok Jan 14, 2026
b8cbe92
add PYARROW_TEST_ANNOTATIONS to AMD64 Windows 2022 Python 3.13
rok Jan 14, 2026
9cf8f09
move check to pre-commit
rok Jan 23, 2026
f586afe
change pre-commit, add note
rok Jan 23, 2026
26d44bf
change pre-commit script
rok Jan 23, 2026
45196ff
Revert from pre-commit to ci/scripts/python_test_type_annotations.sh
rok Jan 23, 2026
cbbe21b
Apply suggestions from code review
rok Jan 25, 2026
f0db4e4
apply review suggestion
rok Jan 25, 2026
9de24df
fix shellcheck
rok Jan 25, 2026
1bac491
Try single build
rok Jan 26, 2026
12ff649
lint
rok Jan 26, 2026
9d2fcf0
review feedback
rok Jan 31, 2026
2ca0108
reintroduce pythonVersion = 3.10
rok Jan 31, 2026
0a518b1
revert change
rok Jan 31, 2026
c78dafc
post rebase fix
rok Jan 31, 2026
9e00fd7
--no-build-isolation bypasses pyproject.toml's build requirements
rok Jan 31, 2026
afc2f46
libcs added
rok Jan 31, 2026
e96017f
Added DYLD_LIBRARY_PATH export to python_build.sh
rok Jan 31, 2026
7f174b4
Add GitHub Actions workflow for type checking (mypy, pyright, ty). Ad…
rok Dec 21, 2025
0ab7dc7
Use PYARROW_TEST_ANNOTATIONS in windows build, disable wheel docstrin…
rok Jan 14, 2026
93a93ea
move check to pre-commit
rok Jan 23, 2026
4cfc357
Revert from pre-commit to ci/scripts/python_test_type_annotations.sh
rok Jan 23, 2026
78044ff
add-type-stubs-for-internal-type-system
rok Jan 26, 2026
65da759
fsspec comes without annotations
rok Jan 26, 2026
367953c
verbosity setting
rok Jan 26, 2026
7873930
post rebase
rok Feb 5, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -337,6 +337,7 @@ repos:
?^ci/scripts/python_sdist_build\.sh$|
?^ci/scripts/python_sdist_test\.sh$|
?^ci/scripts/python_wheel_unix_test\.sh$|
?^ci/scripts/python_test_type_annotations\.sh$|
?^ci/scripts/r_build\.sh$|
?^ci/scripts/r_revdepcheck\.sh$|
?^ci/scripts/release_test\.sh$|
Expand Down Expand Up @@ -377,6 +378,7 @@ repos:
# TODO: Remove this when we fix all lint failures
files: >-
(
?^ci/scripts/python_test_type_annotations\.sh$|
?^dev/release/05-binary-upload\.sh$|
?^dev/release/binary-recover\.sh$|
?^dev/release/post-03-binary\.sh$|
Expand Down
1 change: 1 addition & 0 deletions ci/conda_env_python.txt
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ cython>=3.1
cloudpickle
fsspec
hypothesis
libcst>=1.8.6
numpy>=1.16.6
pytest
pytest-faulthandler
Expand Down
1 change: 1 addition & 0 deletions ci/scripts/python_build.sh
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,7 @@ export PYARROW_PARALLEL=${n_jobs}
: "${CMAKE_PREFIX_PATH:=${ARROW_HOME}}"
export CMAKE_PREFIX_PATH
export LD_LIBRARY_PATH=${ARROW_HOME}/lib:${LD_LIBRARY_PATH}
export DYLD_LIBRARY_PATH=${ARROW_HOME}/lib${DYLD_LIBRARY_PATH:+:${DYLD_LIBRARY_PATH}}

# https://github.com/apache/arrow/issues/41429
# TODO: We want to out-of-source build. This is a workaround. We copy
Expand Down
39 changes: 39 additions & 0 deletions ci/scripts/python_test_type_annotations.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
#!/usr/bin/env bash
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.

set -ex
pyarrow_dir=${1}

if [ -n "${ARROW_PYTHON_VENV:-}" ]; then
# shellcheck source=/dev/null
. "${ARROW_PYTHON_VENV}/bin/activate"
fi

# Install library stubs. Note some libraries contain their own type hints so they need to be installed.
pip install fsspec pandas-stubs scipy-stubs types-cffi types-psutil types-requests types-python-dateutil

# Install type checkers
pip install mypy pyright ty

# Run type checkers
pushd "${pyarrow_dir}"
mypy
pyright --stats
ty check --verbose --output-format concise
popd
2 changes: 1 addition & 1 deletion ci/scripts/python_wheel_validate_contents.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ def validate_wheel(path):
for info in f.filelist), \
f"{filename} is missing from the wheel."
print(f"The wheel: {wheels[0]} seems valid.")

# TODO(GH-32609): Validate some docstrings were generated and added.

def main():
parser = argparse.ArgumentParser()
Expand Down
3 changes: 2 additions & 1 deletion compose.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -1539,7 +1539,8 @@ services:
/arrow/ci/scripts/python_build.sh /arrow /build &&
pip install -e /arrow/dev/archery[numpydoc] &&
archery numpydoc --allow-rule GL10,PR01,PR03,PR04,PR05,PR10,RT03,YD01 &&
/arrow/ci/scripts/python_test.sh /arrow"]
/arrow/ci/scripts/python_test.sh /arrow &&
/arrow/ci/scripts/python_test_type_annotations.sh /arrow/python"]

conda-python-dask:
# Possible $DASK parameters:
Expand Down
68 changes: 68 additions & 0 deletions docs/source/developers/python/development.rst
Original file line number Diff line number Diff line change
Expand Up @@ -101,6 +101,74 @@ The test groups currently include:
* ``s3``: Tests for Amazon S3
* ``tensorflow``: Tests that involve TensorFlow

Type Checking
=============

PyArrow provides type stubs (``*.pyi`` files) for static type checking. These
stubs are located in the ``pyarrow-stubs/`` directory and are automatically
included in the distributed wheel packages.

Running Type Checkers
---------------------

We support multiple type checkers. Their configurations are in
``pyproject.toml``.

**mypy**

To run mypy on the PyArrow codebase:

.. code-block::

$ cd arrow/python
$ mypy

The mypy configuration is in the ``[tool.mypy]`` section of ``pyproject.toml``.

**pyright**

To run pyright:

.. code-block::

$ cd arrow/python
$ pyright

The pyright configuration is in the ``[tool.pyright]`` section of ``pyproject.toml``.

**ty**

To run ty (note: currently only partially configured):

.. code-block::

$ cd arrow/python
$ ty check

Maintaining Type Stubs
-----------------------

Type stubs for PyArrow are maintained in the ``pyarrow-stubs/``
directory. These stubs mirror the structure of the main ``pyarrow/`` package.

When adding or modifying public APIs:

1. **Update the corresponding ``.pyi`` stub file** in ``pyarrow-stubs/``
to reflect the new or changed function/class signatures.

2. **Include type annotations** where possible. For Cython modules or
dynamically generated APIs such as compute kernels add the corresponding
stub in ``pyarrow-stubs/``.

3. **Run type checkers** to ensure the stubs are correct and complete.

The stub files are automatically copied into the built wheel during the build
process and will be included when users install PyArrow, enabling type checking
in downstream projects and for users' IDEs.

Note: ``py.typed`` marker file in the ``pyarrow/`` directory indicates to type
checkers that PyArrow supports type checking according to :pep:`561`.

Doctest
=======

Expand Down
1 change: 1 addition & 0 deletions python/MANIFEST.in
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ include ../NOTICE.txt

global-include CMakeLists.txt
graft pyarrow
graft pyarrow-stubs
graft cmake_modules

global-exclude *.so
Expand Down
29 changes: 29 additions & 0 deletions python/pyarrow-stubs/pyarrow/__init__.pyi
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.

"""Type stubs for PyArrow.

This is a placeholder stub file.
Complete type annotations will be added in subsequent PRs.
"""

from typing import Any

# TODO(GH-48970): remove __getattr__ before release as this
# will annotate non-existing attributes as Any.
# https://github.com/apache/arrow/issues/48970
def __getattr__(name: str) -> Any: ...
133 changes: 133 additions & 0 deletions python/pyarrow-stubs/pyarrow/_stubs_typing.pyi
Original file line number Diff line number Diff line change
@@ -0,0 +1,133 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.

import datetime as dt

from collections.abc import Collection, Iterator, Sequence
from decimal import Decimal
from typing import Any, Literal, Protocol, TypeAlias, TypeVar

import numpy as np

from numpy.typing import NDArray

from pyarrow.lib import BooleanArray, IntegerArray, ChunkedArray

ArrayLike: TypeAlias = Any
ScalarLike: TypeAlias = Any
Order: TypeAlias = Literal["ascending", "descending"]
JoinType: TypeAlias = Literal[
"left semi",
"right semi",
"left anti",
"right anti",
"inner",
"left outer",
"right outer",
"full outer",
]
Compression: TypeAlias = Literal[
"gzip", "bz2", "brotli", "lz4", "lz4_frame", "lz4_raw", "zstd", "snappy"
]
NullEncoding: TypeAlias = Literal["mask", "encode"]
NullSelectionBehavior: TypeAlias = Literal["drop", "emit_null"]
TimeUnit: TypeAlias = Literal["s", "ms", "us", "ns"]
Mask: TypeAlias = (
Sequence[bool | None]
| NDArray[np.bool_]
| BooleanArray
| ChunkedArray[Any]
)
Indices: TypeAlias = (
Sequence[int | None]
| NDArray[np.integer[Any]]
| IntegerArray
| ChunkedArray[Any]
)
Comment on lines +49 to +60
Copy link

@dangotbanned dangotbanned Dec 22, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This isn't the most exciting suggestion, but it's something that constantly frustrates me 😅

Suggested change
Mask: TypeAlias = (
Sequence[bool | None]
| NDArray[np.bool_]
| BooleanArray
| ChunkedArray[Any]
)
Indices: TypeAlias = (
Sequence[int | None]
| NDArray[np.integer[Any]]
| IntegerArray
| ChunkedArray[Any]
)
from pyarrow import lib
IntegerType: TypeAlias = (
lib.Int8Type
| lib.Int16Type
| lib.Int32Type
| lib.Int64Type
| lib.UInt8Type
| lib.UInt16Type
| lib.UInt32Type
| lib.UInt64Type
)
Mask: TypeAlias = (
Sequence[bool | None]
| NDArray[np.bool_]
| lib.Array[lib.Scalar[lib.BoolType]]
| ChunkedArray[Any]
)
Indices: TypeAlias = (
Sequence[int | None]
| NDArray[np.integer[Any]]
| lib.Array[lib.Scalar[IntegerType]]
| ChunkedArray[Any]
)

An alternative would just be Array[Any].
Using the concrete subclasses requires the stubs to do a carefully choreographed dance, or the user to typing.cast everywhere - sadly


PyScalar: TypeAlias = (bool | int | float | Decimal | str | bytes |
dt.date | dt.datetime | dt.time | dt.timedelta)

_T = TypeVar("_T")
_V = TypeVar("_V", covariant=True)

SingleOrList: TypeAlias = list[_T] | _T


class SupportEq(Protocol):
def __eq__(self, other) -> bool: ...


class SupportLt(Protocol):
def __lt__(self, other) -> bool: ...


class SupportGt(Protocol):
def __gt__(self, other) -> bool: ...


class SupportLe(Protocol):
def __le__(self, other) -> bool: ...


class SupportGe(Protocol):
def __ge__(self, other) -> bool: ...


FilterTuple: TypeAlias = (
tuple[str, Literal["=", "==", "!="], SupportEq]
| tuple[str, Literal["<"], SupportLt]
| tuple[str, Literal[">"], SupportGt]
| tuple[str, Literal["<="], SupportLe]
| tuple[str, Literal[">="], SupportGe]
| tuple[str, Literal["in", "not in"], Collection]
| tuple[str, str, Any] # Allow general str for operator to avoid type errors
)


class Buffer(Protocol):
...


class SupportPyBuffer(Protocol):
...


class SupportArrowStream(Protocol):
def __arrow_c_stream__(self, requested_schema=None) -> Any: ...


class SupportPyArrowArray(Protocol):
def __arrow_array__(self, type=None) -> Any: ...


class SupportArrowArray(Protocol):
def __arrow_c_array__(self, requested_schema=None) -> Any: ...


class SupportArrowDeviceArray(Protocol):
def __arrow_c_device_array__(self, requested_schema=None, **kwargs) -> Any: ...


class SupportArrowSchema(Protocol):
def __arrow_c_schema__(self) -> Any: ...


class NullableCollection(Protocol[_V]): # type: ignore[reportInvalidTypeVarUse]
def __iter__(self) -> Iterator[_V] | Iterator[_V | None]: ...
def __len__(self) -> int: ...
def __contains__(self, item: Any, /) -> bool: ...
Loading
Loading