Skip to content

Commit aceed93

Browse files
veeceeyclaude
andcommitted
test: add tests reproducing UTF-8 encoding issue on non-UTF-8 locales
Add test_utf8_encoding.py that simulates Windows behavior where Path.read_text() / Path.write_text() default to system encoding (e.g. CP1251) instead of UTF-8, causing UnicodeDecodeError with non-ASCII characters (Cyrillic, Chinese, accented). The tests monkeypatch Path methods to raise when encoding is not explicitly specified, verifying all providers (Pep621, Npm, Cargo, Uv) pass encoding="utf-8". Also fix ruff formatting in 3 provider files. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
1 parent 90c1ad5 commit aceed93

File tree

4 files changed

+279
-12
lines changed

4 files changed

+279
-12
lines changed

commitizen/providers/cargo_provider.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -75,9 +75,9 @@ def set_lock_version(self, version: str) -> None:
7575
continue
7676

7777
cargo_file = Path(path) / "Cargo.toml"
78-
package_content = parse(
79-
cargo_file.read_text(encoding="utf-8")
80-
).get("package", {})
78+
package_content = parse(cargo_file.read_text(encoding="utf-8")).get(
79+
"package", {}
80+
)
8181
if TYPE_CHECKING:
8282
assert isinstance(package_content, dict)
8383
try:

commitizen/providers/npm_provider.py

Lines changed: 2 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -36,9 +36,7 @@ def get_version(self) -> str:
3636
"""
3737
Get the current version from package.json
3838
"""
39-
package_document = json.loads(
40-
self.package_file.read_text(encoding="utf-8")
41-
)
39+
package_document = json.loads(self.package_file.read_text(encoding="utf-8"))
4240
return self.get_package_version(package_document)
4341

4442
def set_version(self, version: str) -> None:
@@ -59,9 +57,7 @@ def set_version(self, version: str) -> None:
5957
)
6058
if self.shrinkwrap_file.is_file():
6159
shrinkwrap_document = self.set_shrinkwrap_version(
62-
json.loads(
63-
self.shrinkwrap_file.read_text(encoding="utf-8")
64-
),
60+
json.loads(self.shrinkwrap_file.read_text(encoding="utf-8")),
6561
version,
6662
)
6763
self.shrinkwrap_file.write_text(

commitizen/providers/uv_provider.py

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -26,9 +26,7 @@ def set_version(self, version: str) -> None:
2626
self.set_lock_version(version)
2727

2828
def set_lock_version(self, version: str) -> None:
29-
pyproject_toml_content = tomlkit.parse(
30-
self.file.read_text(encoding="utf-8")
31-
)
29+
pyproject_toml_content = tomlkit.parse(self.file.read_text(encoding="utf-8"))
3230
project_name = pyproject_toml_content["project"]["name"] # type: ignore[index]
3331
normalized_project_name = canonicalize_name(str(project_name))
3432

Lines changed: 273 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,273 @@
1+
"""Tests for explicit UTF-8 encoding in file read/write operations.
2+
3+
Reproduces the issue from #1636 where on Windows, Path.read_text() and
4+
Path.write_text() default to the system encoding (e.g. CP1251) rather than
5+
UTF-8, causing UnicodeDecodeError when files contain non-ASCII characters
6+
such as Cyrillic text in commitizen customize options.
7+
8+
The tests monkeypatch Path.read_text/write_text to simulate Windows behavior
9+
by raising UnicodeDecodeError when encoding is not explicitly specified.
10+
"""
11+
12+
from __future__ import annotations
13+
14+
import json
15+
from pathlib import Path
16+
from typing import TYPE_CHECKING
17+
from unittest.mock import patch
18+
19+
import pytest
20+
import tomlkit
21+
22+
from commitizen.providers import get_provider
23+
from commitizen.providers.cargo_provider import CargoProvider
24+
from commitizen.providers.npm_provider import NpmProvider
25+
from commitizen.providers.pep621_provider import Pep621Provider
26+
from commitizen.providers.uv_provider import UvProvider
27+
28+
if TYPE_CHECKING:
29+
from commitizen.config.base_config import BaseConfig
30+
31+
# Non-ASCII content for testing: Cyrillic, Chinese, accented characters
32+
NON_ASCII_COMMENT = "# Тестовый комментарий 测试注释 cafe\u0301"
33+
34+
PEP621_TOML_WITH_NON_ASCII = """\
35+
[project]
36+
name = "my-project"
37+
version = "0.1.0"
38+
description = "Описание проекта 项目描述"
39+
"""
40+
41+
PEP621_TOML_EXPECTED = """\
42+
[project]
43+
name = "my-project"
44+
version = "42.1"
45+
description = "Описание проекта 项目描述"
46+
"""
47+
48+
NPM_PACKAGE_WITH_NON_ASCII = """\
49+
{
50+
"name": "my-project",
51+
"version": "0.1.0",
52+
"description": "Описание проекта 项目描述"
53+
}
54+
"""
55+
56+
NPM_PACKAGE_EXPECTED = """\
57+
{
58+
"name": "my-project",
59+
"version": "42.1",
60+
"description": "\\u041e\\u043f\\u0438\\u0441\\u0430\\u043d\\u0438\\u0435 \\u043f\\u0440\\u043e\\u0435\\u043a\\u0442\\u0430 \\u9879\\u76ee\\u63cf\\u8ff0"
61+
}
62+
"""
63+
64+
CARGO_TOML_WITH_NON_ASCII = """\
65+
[package]
66+
name = "whatever"
67+
version = "0.1.0"
68+
description = "Описание проекта 项目描述"
69+
"""
70+
71+
UV_PYPROJECT_WITH_NON_ASCII = """\
72+
[project]
73+
name = "test-uv"
74+
version = "4.2.1"
75+
description = "Описание проекта 项目描述"
76+
"""
77+
78+
UV_LOCK_WITH_NON_ASCII = """\
79+
version = 1
80+
revision = 1
81+
requires-python = ">=3.13"
82+
83+
[[package]]
84+
name = "test-uv"
85+
version = "4.2.1"
86+
source = { virtual = "." }
87+
"""
88+
89+
90+
def _make_strict_read_text(original_read_text):
91+
"""Wrap Path.read_text to raise UnicodeDecodeError when encoding is not
92+
explicitly set, simulating Windows with a non-UTF-8 default encoding
93+
(e.g. CP1251)."""
94+
95+
def strict_read_text(self, *args, encoding=None, errors=None):
96+
if encoding is None:
97+
raise UnicodeDecodeError(
98+
"charmap",
99+
b"\x98",
100+
0,
101+
1,
102+
"character maps to <undefined>",
103+
)
104+
return original_read_text(self, *args, encoding=encoding, errors=errors)
105+
106+
return strict_read_text
107+
108+
109+
def _make_strict_write_text(original_write_text):
110+
"""Wrap Path.write_text to raise UnicodeEncodeError when encoding is not
111+
explicitly set, simulating Windows with a non-UTF-8 default encoding."""
112+
113+
def strict_write_text(self, data, *args, encoding=None, errors=None, **kwargs):
114+
if encoding is None:
115+
raise UnicodeEncodeError(
116+
"charmap",
117+
data if isinstance(data, str) else "",
118+
0,
119+
1,
120+
"character maps to <undefined>",
121+
)
122+
return original_write_text(
123+
self, data, *args, encoding=encoding, errors=errors, **kwargs
124+
)
125+
126+
return strict_write_text
127+
128+
129+
@pytest.fixture
130+
def _simulate_non_utf8_locale():
131+
"""Simulate a Windows environment where the default filesystem encoding
132+
is not UTF-8 by monkeypatching Path.read_text and Path.write_text.
133+
134+
When encoding is not explicitly passed, these methods will raise
135+
UnicodeDecodeError / UnicodeEncodeError, reproducing the behavior
136+
described in issue #1636.
137+
"""
138+
original_read_text = Path.read_text
139+
original_write_text = Path.write_text
140+
141+
with (
142+
patch.object(Path, "read_text", _make_strict_read_text(original_read_text)),
143+
patch.object(Path, "write_text", _make_strict_write_text(original_write_text)),
144+
):
145+
yield
146+
147+
148+
class TestPep621ProviderUtf8:
149+
"""Test that Pep621Provider (TomlProvider) handles non-ASCII content."""
150+
151+
@pytest.mark.usefixtures("_simulate_non_utf8_locale")
152+
def test_get_version_with_non_ascii_content(self, config: BaseConfig, chdir: Path):
153+
file = chdir / Pep621Provider.filename
154+
file.write_text(PEP621_TOML_WITH_NON_ASCII, encoding="utf-8")
155+
config.settings["version_provider"] = "pep621"
156+
157+
provider = get_provider(config)
158+
assert isinstance(provider, Pep621Provider)
159+
assert provider.get_version() == "0.1.0"
160+
161+
@pytest.mark.usefixtures("_simulate_non_utf8_locale")
162+
def test_set_version_with_non_ascii_content(self, config: BaseConfig, chdir: Path):
163+
file = chdir / Pep621Provider.filename
164+
file.write_text(PEP621_TOML_WITH_NON_ASCII, encoding="utf-8")
165+
config.settings["version_provider"] = "pep621"
166+
167+
provider = get_provider(config)
168+
provider.set_version("42.1")
169+
170+
result = file.read_text(encoding="utf-8")
171+
assert result == PEP621_TOML_EXPECTED
172+
173+
@pytest.mark.usefixtures("_simulate_non_utf8_locale")
174+
def test_roundtrip_preserves_non_ascii(self, config: BaseConfig, chdir: Path):
175+
"""Verify non-ASCII characters survive a read-modify-write cycle."""
176+
file = chdir / Pep621Provider.filename
177+
file.write_text(PEP621_TOML_WITH_NON_ASCII, encoding="utf-8")
178+
config.settings["version_provider"] = "pep621"
179+
180+
provider = get_provider(config)
181+
provider.set_version("42.1")
182+
result = file.read_text(encoding="utf-8")
183+
184+
assert "Описание проекта" in result
185+
assert "项目描述" in result
186+
187+
188+
class TestNpmProviderUtf8:
189+
"""Test that NpmProvider handles non-ASCII content."""
190+
191+
@pytest.mark.usefixtures("_simulate_non_utf8_locale")
192+
def test_get_version_with_non_ascii_content(self, config: BaseConfig, chdir: Path):
193+
pkg = chdir / NpmProvider.package_filename
194+
pkg.write_text(NPM_PACKAGE_WITH_NON_ASCII, encoding="utf-8")
195+
config.settings["version_provider"] = "npm"
196+
197+
provider = get_provider(config)
198+
assert isinstance(provider, NpmProvider)
199+
assert provider.get_version() == "0.1.0"
200+
201+
@pytest.mark.usefixtures("_simulate_non_utf8_locale")
202+
def test_set_version_with_non_ascii_content(self, config: BaseConfig, chdir: Path):
203+
pkg = chdir / NpmProvider.package_filename
204+
pkg.write_text(NPM_PACKAGE_WITH_NON_ASCII, encoding="utf-8")
205+
config.settings["version_provider"] = "npm"
206+
207+
provider = get_provider(config)
208+
provider.set_version("42.1")
209+
210+
result = json.loads(pkg.read_text(encoding="utf-8"))
211+
assert result["version"] == "42.1"
212+
213+
214+
class TestCargoProviderUtf8:
215+
"""Test that CargoProvider handles non-ASCII content."""
216+
217+
@pytest.mark.usefixtures("_simulate_non_utf8_locale")
218+
def test_get_version_with_non_ascii_content(self, config: BaseConfig, chdir: Path):
219+
file = chdir / CargoProvider.filename
220+
file.write_text(CARGO_TOML_WITH_NON_ASCII, encoding="utf-8")
221+
config.settings["version_provider"] = "cargo"
222+
223+
provider = get_provider(config)
224+
assert isinstance(provider, CargoProvider)
225+
assert provider.get_version() == "0.1.0"
226+
227+
@pytest.mark.usefixtures("_simulate_non_utf8_locale")
228+
def test_set_version_with_non_ascii_content(self, config: BaseConfig, chdir: Path):
229+
file = chdir / CargoProvider.filename
230+
file.write_text(CARGO_TOML_WITH_NON_ASCII, encoding="utf-8")
231+
config.settings["version_provider"] = "cargo"
232+
233+
provider = get_provider(config)
234+
provider.set_version("42.1")
235+
236+
result = file.read_text(encoding="utf-8")
237+
doc = tomlkit.parse(result)
238+
assert doc["package"]["version"] == "42.1"
239+
assert "Описание проекта" in result
240+
241+
242+
class TestUvProviderUtf8:
243+
"""Test that UvProvider handles non-ASCII content."""
244+
245+
@pytest.mark.usefixtures("_simulate_non_utf8_locale")
246+
def test_get_version_with_non_ascii_content(self, config: BaseConfig, chdir: Path):
247+
pyproject_file = chdir / UvProvider.filename
248+
pyproject_file.write_text(UV_PYPROJECT_WITH_NON_ASCII, encoding="utf-8")
249+
uv_lock_file = chdir / UvProvider.lock_filename
250+
uv_lock_file.write_text(UV_LOCK_WITH_NON_ASCII, encoding="utf-8")
251+
config.settings["version_provider"] = "uv"
252+
253+
provider = get_provider(config)
254+
assert isinstance(provider, UvProvider)
255+
assert provider.get_version() == "4.2.1"
256+
257+
@pytest.mark.usefixtures("_simulate_non_utf8_locale")
258+
def test_set_version_with_non_ascii_content(self, config: BaseConfig, chdir: Path):
259+
pyproject_file = chdir / UvProvider.filename
260+
pyproject_file.write_text(UV_PYPROJECT_WITH_NON_ASCII, encoding="utf-8")
261+
uv_lock_file = chdir / UvProvider.lock_filename
262+
uv_lock_file.write_text(UV_LOCK_WITH_NON_ASCII, encoding="utf-8")
263+
config.settings["version_provider"] = "uv"
264+
265+
provider = get_provider(config)
266+
provider.set_version("100.0.0")
267+
268+
pyproject_result = pyproject_file.read_text(encoding="utf-8")
269+
assert "100.0.0" in pyproject_result
270+
assert "Описание проекта" in pyproject_result
271+
272+
lock_result = uv_lock_file.read_text(encoding="utf-8")
273+
assert "100.0.0" in lock_result

0 commit comments

Comments
 (0)