diff --git a/gazelle/MODULE.bazel b/gazelle/MODULE.bazel index cff6341a2b..e9ab9e34f8 100644 --- a/gazelle/MODULE.bazel +++ b/gazelle/MODULE.bazel @@ -8,7 +8,6 @@ bazel_dep(name = "bazel_skylib", version = "1.8.2") bazel_dep(name = "rules_python", version = "0.18.0") bazel_dep(name = "rules_go", version = "0.59.0", repo_name = "io_bazel_rules_go") bazel_dep(name = "gazelle", version = "0.47.0", repo_name = "bazel_gazelle") -bazel_dep(name = "rules_cc", version = "0.0.16") local_path_override( module_name = "rules_python", @@ -23,7 +22,7 @@ use_repo( "com_github_bmatcuk_doublestar_v4", "com_github_emirpasic_gods", "com_github_ghodss_yaml", - "com_github_smacker_go_tree_sitter", + "com_github_odvcencio_gotreesitter", "com_github_stretchr_testify", "in_gopkg_yaml_v2", "org_golang_x_sync", diff --git a/gazelle/deps.bzl b/gazelle/deps.bzl index 7072c6a372..75976d8edd 100644 --- a/gazelle/deps.bzl +++ b/gazelle/deps.bzl @@ -62,6 +62,12 @@ def go_deps(): sum = "h1:X8jg9rRZmJd4yRy7ZeNDRnM+T3ZfHv15JiBJ/avrEXE=", version = "v4.9.1", ) + go_repository( + name = "com_github_creack_pty", + importpath = "github.com/creack/pty", + sum = "h1:uDmaGzcdjhF4i/plgjmEsriH11Y0o7RKapEf/LDaM3w=", + version = "v1.1.9", + ) go_repository( name = "com_github_davecgh_go_spew", importpath = "github.com/davecgh/go-spew", @@ -110,6 +116,24 @@ def go_deps(): sum = "h1:ofyhxvXcZhMsU5ulbFiLKl/XBFqE1GSq7atu8tAmTRI=", version = "v0.6.0", ) + go_repository( + name = "com_github_kr_pretty", + importpath = "github.com/kr/pretty", + sum = "h1:L/CwN0zerZDmRFUapSPitk6f+Q3+0za1rQkzVuMiMFI=", + version = "v0.1.0", + ) + go_repository( + name = "com_github_kr_text", + importpath = "github.com/kr/text", + sum = "h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY=", + version = "v0.2.0", + ) + go_repository( + name = "com_github_odvcencio_gotreesitter", + importpath = "github.com/odvcencio/gotreesitter", + sum = "h1:xM+yZ6G63pfT7nriiUvmddmeL5+PCZ5YdMOOOul+lmw=", + version = "v0.17.4", + ) go_repository( name = "com_github_pmezard_go_difflib", importpath = "github.com/pmezard/go-difflib", @@ -117,10 +141,16 @@ def go_deps(): version = "v1.0.0", ) go_repository( - name = "com_github_smacker_go_tree_sitter", - importpath = "github.com/smacker/go-tree-sitter", - sum = "h1:6C8qej6f1bStuePVkLSFxoU22XBS165D3klxlzRg8F4=", - version = "v0.0.0-20240827094217-dd81d9e9be82", + name = "com_github_segmentio_asm", + importpath = "github.com/segmentio/asm", + sum = "h1:WM03sfUOENvvKexOLp+pCqgb/WDjsi7EK8gIsICtzhc=", + version = "v1.1.3", + ) + go_repository( + name = "com_github_segmentio_encoding", + importpath = "github.com/segmentio/encoding", + sum = "h1:WM4IBnxH8B9TakiM2QD5LyNl9JSndh88QbHqVC+Pauc=", + version = "v0.3.4", ) go_repository( name = "com_github_stretchr_objx", @@ -134,11 +164,17 @@ def go_deps(): sum = "h1:HtqpIVDClZ4nwg75+f6Lvsy/wHu+3BoSGCbBAcpTsTg=", version = "v1.9.0", ) + go_repository( + name = "dev_lsp_go_jsonrpc2", + importpath = "go.lsp.dev/jsonrpc2", + sum = "h1:Pr/YcXJoEOTMc/b6OTmcR1DPJ3mSWl/SWiU1Cct6VmI=", + version = "v0.10.0", + ) go_repository( name = "in_gopkg_check_v1", importpath = "gopkg.in/check.v1", - sum = "h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM=", - version = "v0.0.0-20161208181325-20d25e280405", + sum = "h1:qIbj1fsPNlZgppZ+VLlY7N33q108Sa+fhmuc+sWQYwY=", + version = "v1.0.0-20180628173108-788fd7840127", ) go_repository( name = "in_gopkg_yaml_v2", diff --git a/gazelle/go.mod b/gazelle/go.mod index 9ad4951536..6ad1519bc7 100644 --- a/gazelle/go.mod +++ b/gazelle/go.mod @@ -11,7 +11,7 @@ require ( github.com/bmatcuk/doublestar/v4 v4.9.1 github.com/emirpasic/gods v1.18.1 github.com/ghodss/yaml v1.0.0 - github.com/smacker/go-tree-sitter v0.0.0-20240827094217-dd81d9e9be82 + github.com/odvcencio/gotreesitter v0.17.4 github.com/stretchr/testify v1.9.0 golang.org/x/sync v0.11.0 gopkg.in/yaml.v2 v2.4.0 @@ -20,6 +20,7 @@ require ( require ( github.com/davecgh/go-spew v1.1.1 // indirect github.com/google/go-cmp v0.6.0 // indirect + github.com/kr/text v0.2.0 // indirect github.com/pmezard/go-difflib v1.0.0 // indirect golang.org/x/mod v0.23.0 // indirect golang.org/x/sys v0.30.0 // indirect diff --git a/gazelle/go.sum b/gazelle/go.sum index d5fc2b1af9..e182422467 100644 --- a/gazelle/go.sum +++ b/gazelle/go.sum @@ -6,6 +6,7 @@ github.com/bazelbuild/rules_go v0.55.1 h1:cQYGcunY8myOB+0Ym6PGQRhc/milkRcNv0my3X github.com/bazelbuild/rules_go v0.55.1/go.mod h1:T90Gpyq4HDFlsrvtQa2CBdHNJ2P4rAu/uUTmQbanzf0= github.com/bmatcuk/doublestar/v4 v4.9.1 h1:X8jg9rRZmJd4yRy7ZeNDRnM+T3ZfHv15JiBJ/avrEXE= github.com/bmatcuk/doublestar/v4 v4.9.1/go.mod h1:xBQ8jztBU6kakFMg+8WGxn0c6z1fTSPVIjEY1Wr7jzc= +github.com/creack/pty v1.1.9/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ33E= github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/emirpasic/gods v1.18.1 h1:FXtiHYKDGKCW2KzwZKx0iC0PQmdlorYgdFG9jPXJ1Bc= @@ -14,10 +15,14 @@ github.com/ghodss/yaml v1.0.0 h1:wQHKEahhL6wmXdzwWG11gIVCkOv05bNOh+Rxn0yngAk= github.com/ghodss/yaml v1.0.0/go.mod h1:4dBDuWmgqj2HViK6kFavaiC9ZROes6MMH2rRYeMEF04= github.com/google/go-cmp v0.6.0 h1:ofyhxvXcZhMsU5ulbFiLKl/XBFqE1GSq7atu8tAmTRI= github.com/google/go-cmp v0.6.0/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY= +github.com/kr/pretty v0.1.0 h1:L/CwN0zerZDmRFUapSPitk6f+Q3+0za1rQkzVuMiMFI= +github.com/kr/pretty v0.1.0/go.mod h1:dAy3ld7l9f0ibDNOQOHHMYYIIbhfbHSm3C4ZsoJORNo= +github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY= +github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE= +github.com/odvcencio/gotreesitter v0.17.4 h1:xM+yZ6G63pfT7nriiUvmddmeL5+PCZ5YdMOOOul+lmw= +github.com/odvcencio/gotreesitter v0.17.4/go.mod h1:MSmkQmznhGkdLcyQxiM813bi014e1Y1cpcDnm50meHs= github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= -github.com/smacker/go-tree-sitter v0.0.0-20240827094217-dd81d9e9be82 h1:6C8qej6f1bStuePVkLSFxoU22XBS165D3klxlzRg8F4= -github.com/smacker/go-tree-sitter v0.0.0-20240827094217-dd81d9e9be82/go.mod h1:xe4pgH49k4SsmkQq5OT8abwhWmnzkhpgnXeekbx2efw= github.com/stretchr/testify v1.9.0 h1:HtqpIVDClZ4nwg75+f6Lvsy/wHu+3BoSGCbBAcpTsTg= github.com/stretchr/testify v1.9.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY= golang.org/x/mod v0.23.0 h1:Zb7khfcRGKk+kqfxFaP5tZqCnDZMjC5VtUBs87Hr6QM= @@ -28,8 +33,9 @@ golang.org/x/sys v0.30.0 h1:QjkSwP/36a20jFYWkSue1YwXzLmsV5Gfq7Eiy72C1uc= golang.org/x/sys v0.30.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= golang.org/x/tools/go/vcs v0.1.0-deprecated h1:cOIJqWBl99H1dH5LWizPa+0ImeeJq3t3cJjaeOWUAL4= golang.org/x/tools/go/vcs v0.1.0-deprecated/go.mod h1:zUrvATBAvEI9535oC0yWYsLsHIV4Z7g63sNPVMtuBy8= -gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= +gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127 h1:qIbj1fsPNlZgppZ+VLlY7N33q108Sa+fhmuc+sWQYwY= +gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= gopkg.in/yaml.v2 v2.4.0 h1:D8xgwECY7CYvx+Y2n4sBz93Jn9JRvxdiyyo8CTfuKaY= gopkg.in/yaml.v2 v2.4.0/go.mod h1:RDklbk79AGWmwhnvt/jBztapEOGDOx6ZbXqjP6csGnQ= gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= diff --git a/gazelle/python/BUILD.bazel b/gazelle/python/BUILD.bazel index b988e493c7..dbc70bb653 100644 --- a/gazelle/python/BUILD.bazel +++ b/gazelle/python/BUILD.bazel @@ -41,8 +41,8 @@ go_library( "@com_github_emirpasic_gods//lists/singlylinkedlist", "@com_github_emirpasic_gods//sets/treeset", "@com_github_emirpasic_gods//utils", - "@com_github_smacker_go_tree_sitter//:go-tree-sitter", - "@com_github_smacker_go_tree_sitter//python", + "@com_github_odvcencio_gotreesitter//:gotreesitter", + "@com_github_odvcencio_gotreesitter//grammars", "@org_golang_x_sync//errgroup", ], ) diff --git a/gazelle/python/file_parser.go b/gazelle/python/file_parser.go index e129337e11..875246c68d 100644 --- a/gazelle/python/file_parser.go +++ b/gazelle/python/file_parser.go @@ -22,8 +22,8 @@ import ( "path/filepath" "strings" - sitter "github.com/smacker/go-tree-sitter" - "github.com/smacker/go-tree-sitter/python" + sitter "github.com/odvcencio/gotreesitter" + "github.com/odvcencio/gotreesitter/grammars" ) const ( @@ -39,6 +39,11 @@ const ( sitterNodeTypeImportFromStatement = "import_from_statement" ) +var ( + pythonLanguage = grammars.PythonLanguage() + pythonParserPool = sitter.NewParserPool(pythonLanguage) +) + type ParserOutput struct { FileName string Modules []Module @@ -47,31 +52,27 @@ type ParserOutput struct { } type FileParser struct { - code []byte - relFilepath string - output ParserOutput - inTypeCheckingBlock bool + code []byte + relFilepath string + output ParserOutput + inTypeCheckingBlock bool } func NewFileParser() *FileParser { return &FileParser{} } -// ParseCode instantiates a new tree-sitter Parser and parses the python code, returning -// the tree-sitter RootNode. +// parseTree parses Python code and returns the tree-sitter Tree. // It prints a warning if parsing fails. -func ParseCode(code []byte, path string) (*sitter.Node, error) { - parser := sitter.NewParser() - parser.SetLanguage(python.GetLanguage()) - - tree, err := parser.ParseCtx(context.Background(), nil, code) +func parseTree(code []byte, path string) (*sitter.Tree, error) { + tree, err := pythonParserPool.Parse(code) if err != nil { return nil, err } root := tree.RootNode() if !root.HasError() { - return root, nil + return tree, nil } log.Printf("WARNING: failed to parse %q. The resulting BUILD target may be incorrect.", path) @@ -80,7 +81,7 @@ func ParseCode(code []byte, path string) (*sitter.Node, error) { // failure may be in some part of the code that Gazelle doesn't care about. verbose, envExists := os.LookupEnv("RULES_PYTHON_GAZELLE_VERBOSE") if !envExists || verbose != "1" { - return root, nil + return tree, nil } for i := 0; i < int(root.ChildCount()); i++ { @@ -89,14 +90,25 @@ func ParseCode(code []byte, path string) (*sitter.Node, error) { // Example logs: // gazelle: Parse error at {Row:1 Column:0}: // def search_one_more_level[T](): - log.Printf("Parse error at %+v:\n%+v", child.StartPoint(), child.Content(code)) + log.Printf("Parse error at %+v:\n%+v", child.StartPoint(), child.Text(code)) // Log the internal tree-sitter representation of what was parsed. Eg: // gazelle: The above was parsed as: (ERROR (identifier) (call function: (list (identifier)) arguments: (argument_list))) - log.Printf("The above was parsed as: %v", child.String()) + log.Printf("The above was parsed as: %v", child.SExpr(pythonLanguage)) } } - return root, nil + return tree, nil +} + +// ParseCode instantiates a tree-sitter Parser and parses the python code, returning +// the tree-sitter RootNode. +// It prints a warning if parsing fails. +func ParseCode(code []byte, path string) (*sitter.Node, error) { + tree, err := parseTree(code, path) + if err != nil { + return nil, err + } + return tree.RootNode(), nil } // parseMain returns true if the python file has an `if __name__ == "__main__":` block, @@ -107,16 +119,16 @@ func (p *FileParser) parseMain(ctx context.Context, node *sitter.Node) bool { return false } child := node.Child(i) - if child.Type() == sitterNodeTypeIfStatement && - child.Child(1).Type() == sitterNodeTypeComparisonOperator && child.Child(1).Child(1).Type() == "==" { + if child.Type(pythonLanguage) == sitterNodeTypeIfStatement && + child.Child(1).Type(pythonLanguage) == sitterNodeTypeComparisonOperator && child.Child(1).Child(1).Type(pythonLanguage) == "==" { statement := child.Child(1) a, b := statement.Child(0), statement.Child(2) // convert "'__main__' == __name__" to "__name__ == '__main__'" - if b.Type() == sitterNodeTypeIdentifier { + if b.Type(pythonLanguage) == sitterNodeTypeIdentifier { a, b = b, a } - if a.Type() == sitterNodeTypeIdentifier && a.Content(p.code) == "__name__" && - b.Type() == sitterNodeTypeString && string(p.code[b.StartByte()+1:b.EndByte()-1]) == "__main__" { + if a.Type(pythonLanguage) == sitterNodeTypeIdentifier && a.Text(p.code) == "__name__" && + b.Type(pythonLanguage) == sitterNodeTypeString && string(p.code[b.StartByte()+1:b.EndByte()-1]) == "__main__" { return true } } @@ -127,10 +139,10 @@ func (p *FileParser) parseMain(ctx context.Context, node *sitter.Node) bool { // parseImportStatement parses a node for an import statement, returning a `Module` and a boolean // representing if the parse was OK or not. func parseImportStatement(node *sitter.Node, code []byte) (Module, bool) { - switch node.Type() { + switch node.Type(pythonLanguage) { case sitterNodeTypeDottedName: return Module{ - Name: node.Content(code), + Name: node.Text(code), LineNumber: node.StartPoint().Row + 1, }, true case sitterNodeTypeAliasedImport: @@ -158,7 +170,7 @@ func cleanImportString(s string) string { // an import statement. It updates FileParser.output.Modules with the `module` that the // import represents. func (p *FileParser) parseImportStatements(node *sitter.Node) bool { - if node.Type() == sitterNodeTypeImportStatement { + if node.Type(pythonLanguage) == sitterNodeTypeImportStatement { for j := 1; j < int(node.ChildCount()); j++ { m, ok := parseImportStatement(node.Child(j), p.code) if !ok { @@ -173,8 +185,8 @@ func (p *FileParser) parseImportStatements(node *sitter.Node) bool { } p.output.Modules = append(p.output.Modules, m) } - } else if node.Type() == sitterNodeTypeImportFromStatement { - from := node.Child(1).Content(p.code) + } else if node.Type(pythonLanguage) == sitterNodeTypeImportFromStatement { + from := node.Child(1).Text(p.code) from = cleanImportString(from) // If the import is from the current package, we don't need to add it to the modules i.e. from . import Class1. // If the import is from a different relative package i.e. from .package1 import foo, we need to add it to the modules. @@ -202,8 +214,8 @@ func (p *FileParser) parseImportStatements(node *sitter.Node) bool { // parseComments parses a node for comments, returning true if the node is a comment. // It updates FileParser.output.Comments with the parsed comment. func (p *FileParser) parseComments(node *sitter.Node) bool { - if node.Type() == sitterNodeTypeComment { - p.output.Comments = append(p.output.Comments, Comment(node.Content(p.code))) + if node.Type(pythonLanguage) == sitterNodeTypeComment { + p.output.Comments = append(p.output.Comments, Comment(node.Text(p.code))) return true } return false @@ -217,23 +229,23 @@ func (p *FileParser) SetCodeAndFile(code []byte, relPackagePath, filename string // isTypeCheckingBlock returns true if the given node is an `if TYPE_CHECKING:` block. func (p *FileParser) isTypeCheckingBlock(node *sitter.Node) bool { - if node.Type() != sitterNodeTypeIfStatement || node.ChildCount() < 2 { + if node.Type(pythonLanguage) != sitterNodeTypeIfStatement || node.ChildCount() < 2 { return false } condition := node.Child(1) // Handle `if TYPE_CHECKING:` - if condition.Type() == sitterNodeTypeIdentifier && condition.Content(p.code) == "TYPE_CHECKING" { + if condition.Type(pythonLanguage) == sitterNodeTypeIdentifier && condition.Text(p.code) == "TYPE_CHECKING" { return true } // Handle `if typing.TYPE_CHECKING:` - if condition.Type() == "attribute" && condition.ChildCount() >= 3 { + if condition.Type(pythonLanguage) == "attribute" && condition.ChildCount() >= 3 { object := condition.Child(0) attr := condition.Child(2) - if object.Type() == sitterNodeTypeIdentifier && object.Content(p.code) == "typing" && - attr.Type() == sitterNodeTypeIdentifier && attr.Content(p.code) == "TYPE_CHECKING" { + if object.Type(pythonLanguage) == sitterNodeTypeIdentifier && object.Text(p.code) == "typing" && + attr.Type(pythonLanguage) == sitterNodeTypeIdentifier && attr.Text(p.code) == "TYPE_CHECKING" { return true } } @@ -271,11 +283,13 @@ func (p *FileParser) parse(ctx context.Context, node *sitter.Node) { } func (p *FileParser) Parse(ctx context.Context) (*ParserOutput, error) { - rootNode, err := ParseCode(p.code, p.relFilepath) + tree, err := parseTree(p.code, p.relFilepath) if err != nil { return nil, err } + defer tree.Release() + rootNode := tree.RootNode() p.output.HasMain = p.parseMain(ctx, rootNode) p.parse(ctx, rootNode)