From c487aa1949bd725a9f825451bbfc9bd5160be737 Mon Sep 17 00:00:00 2001 From: Ghraven <115199279+Ghraven@users.noreply.github.com> Date: Wed, 20 May 2026 18:48:39 +0000 Subject: [PATCH] fix: add explicit encoding="utf-8" to .txt read in _validators.py The text-mode open() for user-supplied .txt fine-tuning files relied on the platform default encoding (cp1252 on Windows), which raises UnicodeDecodeError or corrupts non-ASCII content on valid UTF-8 files. Pinning encoding="utf-8" makes the read consistent across platforms. --- src/openai/lib/_validators.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/openai/lib/_validators.py b/src/openai/lib/_validators.py index cf24cd2294..a947283641 100644 --- a/src/openai/lib/_validators.py +++ b/src/openai/lib/_validators.py @@ -482,7 +482,7 @@ def read_any_format( elif fname.lower().endswith(".txt"): immediate_msg = "\n- Based on your file extension, you provided a text file" necessary_msg = "Your format `TXT` will be converted to `JSONL`" - with open(fname, "r") as f: + with open(fname, "r", encoding="utf-8") as f: content = f.read() df = pd.DataFrame( [["", line] for line in content.split("\n")],