-
Notifications
You must be signed in to change notification settings - Fork 2
[perf_tool] Add perf_clickhouse GitHub action to test pxl clickhouse read/export #40
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
c608b62
7ded5bb
46c7386
88763e0
fc172f1
381e958
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -35,6 +35,9 @@ namespace px { | |
| namespace carnot { | ||
| namespace exec { | ||
|
|
||
| // TODO(ddelnano): Defend against columns that don't exist. These should be | ||
| // ignored by the Node. | ||
|
|
||
|
Comment on lines
+38
to
+40
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 🧹 Nitpick | 🔵 Trivial Address TODO: Defending against missing columns. This TODO notes a valid defensive measure. If unmapped columns are passed, the code should handle them gracefully rather than potentially failing. Would you like me to help implement this defensive check, or should I open an issue to track this task? 🤖 Prompt for AI Agents |
||
| using table_store::schema::RowBatch; | ||
| using table_store::schema::RowDescriptor; | ||
|
|
||
|
|
@@ -148,12 +151,12 @@ Status ClickHouseExportSinkNode::ConsumeNextImpl(ExecState* /*exec_state*/, cons | |
| break; | ||
| } | ||
| case types::UINT128: { | ||
| // UINT128 is exported as STRING (UUID format) | ||
| // UINT128 is exported as STRING in "high:low" format to match | ||
| // the ClickHouseSourceNode's parsing in clickhouse_source_node.cc | ||
| auto col = std::make_shared<clickhouse::ColumnString>(); | ||
| for (int64_t i = 0; i < num_rows; ++i) { | ||
| auto val = types::GetValueFromArrowArray<types::UINT128>(arrow_col.get(), i); | ||
| std::string uuid_str = sole::rebuild(absl::Uint128High64(val), absl::Uint128Low64(val)).str(); | ||
| col->Append(uuid_str); | ||
| col->Append(absl::Substitute("$0:$1", absl::Uint128High64(val), absl::Uint128Low64(val))); | ||
| } | ||
| block.AppendColumn(mapping.clickhouse_column_name(), col); | ||
| break; | ||
|
|
@@ -164,6 +167,34 @@ Status ClickHouseExportSinkNode::ConsumeNextImpl(ExecState* /*exec_state*/, cons | |
| } | ||
| } | ||
|
|
||
| // Auto-derive event_time from time_ if time_ is present but event_time is not. | ||
| // The ClickHouse table schema uses event_time (DateTime64(3), milliseconds) for | ||
| // partitioning and ordering, but the Pixie table has time_ (TIME64NS, nanoseconds). | ||
| bool has_time_ = false; | ||
| bool has_event_time = false; | ||
| int time_col_index = -1; | ||
| for (const auto& mapping : plan_node_->column_mappings()) { | ||
| if (mapping.clickhouse_column_name() == "time_") { | ||
| has_time_ = true; | ||
| time_col_index = mapping.input_column_index(); | ||
| } | ||
| if (mapping.clickhouse_column_name() == "event_time") { | ||
| has_event_time = true; | ||
| } | ||
| } | ||
|
|
||
| if (has_time_ && !has_event_time && time_col_index >= 0) { | ||
| auto arrow_col = rb.ColumnAt(time_col_index); | ||
| int64_t num_rows = arrow_col->length(); | ||
| auto event_time_col = std::make_shared<clickhouse::ColumnDateTime64>(3); | ||
| for (int64_t i = 0; i < num_rows; ++i) { | ||
| int64_t ns_val = types::GetValueFromArrowArray<types::TIME64NS>(arrow_col.get(), i); | ||
| // Convert nanoseconds to milliseconds for DateTime64(3) | ||
| event_time_col->Append(ns_val / 1000000LL); | ||
| } | ||
| block.AppendColumn("event_time", event_time_col); | ||
| } | ||
|
Comment on lines
+170
to
+196
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Missing type verification before casting to TIME64NS. The event_time derivation assumes the column mapped to "time_" is 🛡️ Proposed fix to verify column type bool has_time_ = false;
bool has_event_time = false;
int time_col_index = -1;
+ types::DataType time_col_type = types::DATA_TYPE_UNKNOWN;
for (const auto& mapping : plan_node_->column_mappings()) {
if (mapping.clickhouse_column_name() == "time_") {
has_time_ = true;
time_col_index = mapping.input_column_index();
+ time_col_type = mapping.column_type();
}
if (mapping.clickhouse_column_name() == "event_time") {
has_event_time = true;
}
}
- if (has_time_ && !has_event_time && time_col_index >= 0) {
+ if (has_time_ && !has_event_time && time_col_index >= 0 &&
+ time_col_type == types::TIME64NS) {
auto arrow_col = rb.ColumnAt(time_col_index);🤖 Prompt for AI Agents |
||
|
|
||
| // Insert the block into ClickHouse | ||
| clickhouse_client_->Insert(plan_node_->table_name(), block); | ||
|
|
||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
🧩 Analysis chain
🏁 Script executed:
Repository: k8sstormcenter/pixie
Length of output: 582
Remove or upgrade forced protobuf version; current pin is vulnerable.
Line 324 forces
google.golang.org/protobuftov1.29.1, which contains a moderate-severity vulnerability (GHSA-8r3f-844c-mc37: infinite loop in JSON unmarshaling). This override defeats the safer indirect version at Line 282 and should be removed or bumped to a patched release consistently across Go/Bazel dependency pins.🤖 Prompt for AI Agents