Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 23 additions & 0 deletions common/utils/src/main/resources/error/error-conditions.json
Original file line number Diff line number Diff line change
Expand Up @@ -4105,6 +4105,29 @@
},
"sqlState" : "42K0E"
},
"INVALID_LINE_SEPARATOR" : {
"message" : [
"Invalid line separator configuration."
],
"subClass" : {
"EMPTY" : {
"message" : [
"The 'lineSep' option cannot be an empty string."
]
},
"NULL" : {
"message" : [
"The 'lineSep' option cannot be a null value."
]
},
"TOO_LONG" : {
"message" : [
"The 'lineSep' option can contain at most 2 characters, but got <length> characters."
]
}
},
"sqlState" : "22023"
},
"INVALID_LOG_VERSION" : {
"message" : [
"UnsupportedLogVersion."
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -284,11 +284,15 @@ class CSVOptions(
* A string between two consecutive JSON records.
*/
val lineSeparator: Option[String] = parameters.get(LINE_SEP).map { sep =>
require(sep != null, "'lineSep' cannot be a null value.")
require(sep.nonEmpty, "'lineSep' cannot be an empty string.")
// Intentionally allow it up to 2 for Window's CRLF although multiple
// characters have an issue with quotes. This is intentionally undocumented.
require(sep.length <= 2, "'lineSep' can contain only 1 character.")
if (sep == null) {
throw QueryExecutionErrors.lineSepCannotBeNullError()
}
if (sep.isEmpty) {
throw QueryExecutionErrors.lineSepCannotBeEmptyError()
}
if (sep.length > 2) {
throw QueryExecutionErrors.lineSepTooLongError(sep.length)
}
if (sep.length == 2) logWarning("It is not recommended to set 'lineSep' " +
"with 2 characters due to the limitation of supporting multi-char 'lineSep' within quotes.")
sep
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3349,4 +3349,20 @@ private[sql] object QueryExecutionErrors extends QueryErrorsBase with ExecutionE
"expectedFamily" -> expectedFamily,
"actualFamily" -> actualFamily))
}

def lineSepCannotBeNullError(): SparkIllegalArgumentException = {
new SparkIllegalArgumentException(
errorClass = "INVALID_LINE_SEPARATOR.NULL")
}

def lineSepCannotBeEmptyError(): SparkIllegalArgumentException = {
new SparkIllegalArgumentException(
errorClass = "INVALID_LINE_SEPARATOR.EMPTY")
}

def lineSepTooLongError(length: Int): SparkIllegalArgumentException = {
new SparkIllegalArgumentException(
errorClass = "INVALID_LINE_SEPARATOR.TOO_LONG",
messageParameters = Map("length" -> length.toString))
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -2484,15 +2484,21 @@ abstract class CSVSuite
// scalastyle:on nonascii

test("lineSep restrictions") {
val errMsg1 = intercept[IllegalArgumentException] {
spark.read.option("lineSep", "").csv(testFile(carsFile)).collect()
}.getMessage
assert(errMsg1.contains("'lineSep' cannot be an empty string"))
checkError(
exception = intercept[SparkIllegalArgumentException] {
spark.read.option("lineSep", "").csv(testFile(carsFile)).collect()
},
condition = "INVALID_LINE_SEPARATOR.EMPTY",
parameters = Map.empty
)

val errMsg2 = intercept[IllegalArgumentException] {
spark.read.option("lineSep", "123").csv(testFile(carsFile)).collect()
}.getMessage
assert(errMsg2.contains("'lineSep' can contain only 1 character"))
checkError(
exception = intercept[SparkIllegalArgumentException] {
spark.read.option("lineSep", "123").csv(testFile(carsFile)).collect()
},
condition = "INVALID_LINE_SEPARATOR.TOO_LONG",
parameters = Map("length" -> "3")
)
}

Seq(true, false).foreach { multiLine =>
Expand Down