From 872c49bd47f1af1e6dd44cef66002dfda1a25392 Mon Sep 17 00:00:00 2001 From: Shivadarshan Date: Thu, 14 May 2026 14:22:33 +0530 Subject: [PATCH 1/5] Created user error for IllegalArgumentException in CSVOptions --- .../resources/error/error-conditions.json | 23 +++++++++++++++++++ .../spark/sql/catalyst/csv/CSVOptions.scala | 14 +++++++---- .../sql/errors/QueryExecutionErrors.scala | 16 +++++++++++++ 3 files changed, 48 insertions(+), 5 deletions(-) diff --git a/common/utils/src/main/resources/error/error-conditions.json b/common/utils/src/main/resources/error/error-conditions.json index 889ecf9f7b08a..29871f8f2007a 100644 --- a/common/utils/src/main/resources/error/error-conditions.json +++ b/common/utils/src/main/resources/error/error-conditions.json @@ -11462,5 +11462,28 @@ "" ], "sqlState" : "P0001" + }, + "INVALID_LINE_SEPARATOR" : { + "message" : [ + "Invalid line separator configuration." + ], + "subClass" : { + "NULL" : { + "message" : [ + "The 'lineSep' option cannot be a null value." + ] + }, + "EMPTY" : { + "message" : [ + "The 'lineSep' option cannot be an empty string." + ] + }, + "TOO_LONG" : { + "message" : [ + "The 'lineSep' option can contain at most 2 characters, but got character(s)." + ] + } + }, + "sqlState" : "22023" } } diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/csv/CSVOptions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/csv/CSVOptions.scala index 9edb1603f4638..4ae7aad19a9c7 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/csv/CSVOptions.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/csv/CSVOptions.scala @@ -284,11 +284,15 @@ class CSVOptions( * A string between two consecutive JSON records. */ val lineSeparator: Option[String] = parameters.get(LINE_SEP).map { sep => - require(sep != null, "'lineSep' cannot be a null value.") - require(sep.nonEmpty, "'lineSep' cannot be an empty string.") - // Intentionally allow it up to 2 for Window's CRLF although multiple - // characters have an issue with quotes. This is intentionally undocumented. - require(sep.length <= 2, "'lineSep' can contain only 1 character.") + if (sep == null) { + throw QueryExecutionErrors.lineSepCannotBeNullError() + } + if (sep.isEmpty) { + throw QueryExecutionErrors.lineSepCannotBeEmptyError() + } + if (sep.length > 2) { + throw QueryExecutionErrors.lineSepTooLongError(sep.length) + } if (sep.length == 2) logWarning("It is not recommended to set 'lineSep' " + "with 2 characters due to the limitation of supporting multi-char 'lineSep' within quotes.") sep diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala index 0aa8308276871..b97244c1c2805 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala @@ -3349,4 +3349,20 @@ private[sql] object QueryExecutionErrors extends QueryErrorsBase with ExecutionE "expectedFamily" -> expectedFamily, "actualFamily" -> actualFamily)) } + + def lineSepCannotBeNullError(): SparkIllegalArgumentException = { + new SparkIllegalArgumentException( + errorClass = "INVALID_LINE_SEPARATOR.NULL") + } + + def lineSepCannotBeEmptyError(): SparkIllegalArgumentException = { + new SparkIllegalArgumentException( + errorClass = "INVALID_LINE_SEPARATOR.EMPTY") + } + + def lineSepTooLongError(length: Int): SparkIllegalArgumentException = { + new SparkIllegalArgumentException( + errorClass = "INVALID_LINE_SEPARATOR.TOO_LONG", + messageParameters = Map("length" -> length.toString)) + } } From e5be5f556e21b1bac2c073f5b1db2ac9605a04cb Mon Sep 17 00:00:00 2001 From: Shivadarshan Date: Thu, 14 May 2026 14:53:31 +0530 Subject: [PATCH 2/5] Created user error for IllegalArgumentException in CSVOptions --- common/utils/src/main/resources/error/error-conditions.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/common/utils/src/main/resources/error/error-conditions.json b/common/utils/src/main/resources/error/error-conditions.json index 29871f8f2007a..6dfe538ededa8 100644 --- a/common/utils/src/main/resources/error/error-conditions.json +++ b/common/utils/src/main/resources/error/error-conditions.json @@ -11480,7 +11480,7 @@ }, "TOO_LONG" : { "message" : [ - "The 'lineSep' option can contain at most 2 characters, but got character(s)." + "The 'lineSep' option can contain at most 2 characters, but got characters." ] } }, From 9055d7e257c73a89d56d34dae589df9cb436bcfe Mon Sep 17 00:00:00 2001 From: Shivadarshan Date: Thu, 14 May 2026 17:22:44 +0530 Subject: [PATCH 3/5] Added fix for failing tc --- .../resources/error/error-conditions.json | 46 +++++++++---------- 1 file changed, 23 insertions(+), 23 deletions(-) diff --git a/common/utils/src/main/resources/error/error-conditions.json b/common/utils/src/main/resources/error/error-conditions.json index 6dfe538ededa8..eeefe31ccc204 100644 --- a/common/utils/src/main/resources/error/error-conditions.json +++ b/common/utils/src/main/resources/error/error-conditions.json @@ -4105,6 +4105,29 @@ }, "sqlState" : "42K0E" }, + "INVALID_LINE_SEPARATOR" : { + "message" : [ + "Invalid line separator configuration." + ], + "subClass" : { + "EMPTY" : { + "message" : [ + "The 'lineSep' option cannot be an empty string." + ] + }, + "NULL" : { + "message" : [ + "The 'lineSep' option cannot be a null value." + ] + }, + "TOO_LONG" : { + "message" : [ + "The 'lineSep' option can contain at most 2 characters, but got characters." + ] + } + }, + "sqlState" : "22023" + }, "INVALID_LOG_VERSION" : { "message" : [ "UnsupportedLogVersion." @@ -11462,28 +11485,5 @@ "" ], "sqlState" : "P0001" - }, - "INVALID_LINE_SEPARATOR" : { - "message" : [ - "Invalid line separator configuration." - ], - "subClass" : { - "NULL" : { - "message" : [ - "The 'lineSep' option cannot be a null value." - ] - }, - "EMPTY" : { - "message" : [ - "The 'lineSep' option cannot be an empty string." - ] - }, - "TOO_LONG" : { - "message" : [ - "The 'lineSep' option can contain at most 2 characters, but got characters." - ] - } - }, - "sqlState" : "22023" } } From b10b54f77a23b8c7716f366ffbddfda8580dab74 Mon Sep 17 00:00:00 2001 From: Shivadarshan Date: Thu, 14 May 2026 19:52:31 +0530 Subject: [PATCH 4/5] Added fix for failing tc --- .../execution/datasources/csv/CSVSuite.scala | 21 ++++++++++++------- 1 file changed, 13 insertions(+), 8 deletions(-) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVSuite.scala index 24f80f4b928f6..2c07adbca88c5 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVSuite.scala @@ -2484,15 +2484,20 @@ abstract class CSVSuite // scalastyle:on nonascii test("lineSep restrictions") { - val errMsg1 = intercept[IllegalArgumentException] { - spark.read.option("lineSep", "").csv(testFile(carsFile)).collect() - }.getMessage - assert(errMsg1.contains("'lineSep' cannot be an empty string")) + checkError( + exception = intercept[SparkIllegalArgumentException] { + spark.read.option("lineSep", "").csv(testFile(carsFile)).collect() + }, + condition = "INVALID_LINE_SEPARATOR.EMPTY" + ) - val errMsg2 = intercept[IllegalArgumentException] { - spark.read.option("lineSep", "123").csv(testFile(carsFile)).collect() - }.getMessage - assert(errMsg2.contains("'lineSep' can contain only 1 character")) + checkError( + exception = intercept[SparkIllegalArgumentException] { + spark.read.option("lineSep", "123").csv(testFile(carsFile)).collect() + }, + condition = "INVALID_LINE_SEPARATOR.TOO_LONG", + parameters = Map("length" -> "3") + ) } Seq(true, false).foreach { multiLine => From 3625089fd77ce5d1a6c7a35b63355faca7dc9c99 Mon Sep 17 00:00:00 2001 From: Shivadarshan Date: Thu, 14 May 2026 22:05:44 +0530 Subject: [PATCH 5/5] Added fix for failing tc --- .../apache/spark/sql/execution/datasources/csv/CSVSuite.scala | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVSuite.scala index 2c07adbca88c5..22b291677cd8b 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVSuite.scala @@ -2488,7 +2488,8 @@ abstract class CSVSuite exception = intercept[SparkIllegalArgumentException] { spark.read.option("lineSep", "").csv(testFile(carsFile)).collect() }, - condition = "INVALID_LINE_SEPARATOR.EMPTY" + condition = "INVALID_LINE_SEPARATOR.EMPTY", + parameters = Map.empty ) checkError(