From 886b35251838e52cb42242a59dc94f193b5d8bea Mon Sep 17 00:00:00 2001 From: Thomas Bracht Laumann Jespersen Date: Sat, 28 Mar 2026 12:08:41 +0000 Subject: [PATCH] pkgcheck/bash: sort captures by line and column It has been observed that the order of nodes returned by tree-sitter's QueryCursor is not necessarily in order wrt line and column in the source, but it appears that some of pkgcheck's usage of tree-sitter assumes that captured nodes are returned in order. There doesn't appear to any features in QueryCursor (or other places) that allows one to specify that returned nodes should be ordered in a specific way, so instead we introduce a decorator on QueryCursor that takes dict of captured nodes and sorts each list of nodes by line and column. Fixes: https://github.com/pkgcore/pkgcheck/issues/702 Signed-off-by: Thomas Bracht Laumann Jespersen --- src/pkgcheck/bash/__init__.py | 32 ++++++++++++++++++++++++++++---- 1 file changed, 28 insertions(+), 4 deletions(-) diff --git a/src/pkgcheck/bash/__init__.py b/src/pkgcheck/bash/__init__.py index 0b5486b40..020cc4128 100644 --- a/src/pkgcheck/bash/__init__.py +++ b/src/pkgcheck/bash/__init__.py @@ -10,15 +10,39 @@ try: from tree_sitter import QueryCursor - def query(query_str: str) -> "QueryCursor": + def unstable_query(query_str: str) -> "QueryCursor": return QueryCursor(Query(lang, query_str)) except ImportError: # tree-sitter < 0.25 QueryCursor = Query - query = lang.query + unstable_query = lang.query parser = Parser(language=lang) + +class SortedQueryCursor: + """ + Sort query results by line and column. It's been observed that + query results from tree-sitter are not consistently returned in + the same order, so this class acts as a decorator for QueryCursor + to sort the returned captures. + """ + + def __init__(self, query_cursor: QueryCursor): + self._query_cursor = query_cursor + + def captures(self, node): + caps = self._query_cursor.captures(node) + return { + key: sorted(nodes, key=lambda n: (n.start_point.row, n.start_point.column)) + for key, nodes in caps.items() + } + + +def query(query_str: str): + return SortedQueryCursor(unstable_query(query_str)) + + # various parse tree queries cmd_query = query("(command) @call") func_query = query("(function_definition) @func") @@ -39,14 +63,14 @@ def node_str(self, node): """Return the ebuild string associated with a given parse tree node.""" return self.data[node.start_byte : node.end_byte].decode("utf8") - def global_query(self, query: QueryCursor): + def global_query(self, query: QueryCursor | SortedQueryCursor): """Run a given parse tree query returning only those nodes in global scope.""" for x in self.tree.root_node.children: # skip nodes in function scope if x.type != "function_definition": yield from chain.from_iterable(query.captures(x).values()) - def func_query(self, query: QueryCursor): + def func_query(self, query: QueryCursor | SortedQueryCursor): """Run a given parse tree query returning only those nodes in function scope.""" for x in self.tree.root_node.children: # only return nodes in function scope