Skip to content
Open
229 changes: 185 additions & 44 deletions lang/collect/collect.go

Large diffs are not rendered by default.

147 changes: 114 additions & 33 deletions lang/collect/export.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ import (
"fmt"
"os"
"path/filepath"
"sort"
"strings"

"github.com/cloudwego/abcoder/lang/log"
Expand All @@ -41,20 +42,33 @@ func (c *Collector) fileLine(loc Location) uniast.FileLine {
rel = filepath.Base(loc.URI.File())
}
fileURI := string(loc.URI)
if c.cli == nil {
return uniast.FileLine{File: rel, Line: loc.Range.Start.Line + 1}
}
f := c.cli.GetFile(loc.URI)
filePath := loc.URI.File()

text := ""
if f != nil {
text = f.Text
} else {
fd, err := os.ReadFile(loc.URI.File())
// 1. Try LSP client files
if c.cli != nil {
if f := c.cli.GetFile(loc.URI); f != nil {
text = f.Text
}
}

// 2. Try internal cache
if text == "" {
if cached, ok := c.fileContentCache[filePath]; ok {
text = cached
}
}

// 3. Fallback to OS ReadFile and update cache
if text == "" {
fd, err := os.ReadFile(filePath)
if err != nil {
return uniast.FileLine{File: rel, Line: loc.Range.Start.Line + 1}
}
text = string(fd)
c.fileContentCache[filePath] = text
}

return uniast.FileLine{
File: rel,
Line: loc.Range.Start.Line + 1,
Expand Down Expand Up @@ -96,14 +110,32 @@ func (c *Collector) Export(ctx context.Context) (*uniast.Repository, error) {
}

// not allow local symbols inside another symbol
c.filterLocalSymbols()
log.Info("Export: filtering local symbols...\n")

//c.filterLocalSymbols()
c.filterLocalSymbolsByCache()

// Pre-compute receivers map to avoid O(N^2) complexity in exportSymbol recursion
log.Info("Export: pre-computing receivers map...\n")
c.receivers = make(map[*DocumentSymbol][]*DocumentSymbol, len(c.funcs)/4)
for method, rec := range c.funcs {
if (method.Kind == SKMethod) && rec.Method != nil && rec.Method.Receiver.Symbol != nil {
c.receivers[rec.Method.Receiver.Symbol] = append(c.receivers[rec.Method.Receiver.Symbol], method)
}

if (method.Kind == SKFunction && c.Language == uniast.Java) && rec.Method != nil && rec.Method.Receiver.Symbol != nil {
c.receivers[rec.Method.Receiver.Symbol] = append(c.receivers[rec.Method.Receiver.Symbol], method)
}
}

// export symbols
log.Info("Export: exporting %d symbols...\n", len(c.syms))
visited := make(map[*DocumentSymbol]*uniast.Identity)
for _, symbol := range c.syms {
_, _ = c.exportSymbol(&repo, symbol, "", visited)
}

log.Info("Export: connecting files to packages...\n")
for fp, f := range c.files {
rel, err := filepath.Rel(c.repo, fp)
if err != nil {
Expand Down Expand Up @@ -162,6 +194,69 @@ func (c *Collector) filterLocalSymbols() {
}
}

func (c *Collector) filterLocalSymbolsByCache() {
if len(c.syms) == 0 {
return
}

// Group symbols by file URI to reduce comparison scope
symsByFile := make(map[DocumentURI][]*DocumentSymbol)
for loc, sym := range c.syms {
symsByFile[loc.URI] = append(symsByFile[loc.URI], sym)
}

for _, fileSyms := range symsByFile {
if len(fileSyms) <= 1 {
continue
}

// Sort symbols in the same file:
// 1. By start offset (ascending)
// 2. By end offset (descending) - larger range first
// This ensures that if symbol A contains symbol B, A appears before B.
sort.Slice(fileSyms, func(i, j int) bool {
locI, locJ := fileSyms[i].Location, fileSyms[j].Location
if locI.Range.Start.Line != locJ.Range.Start.Line {
return locI.Range.Start.Line < locJ.Range.Start.Line
}
if locI.Range.Start.Character != locJ.Range.Start.Character {
return locI.Range.Start.Character < locJ.Range.Start.Character
}
if locI.Range.End.Line != locJ.Range.End.Line {
return locI.Range.End.Line > locJ.Range.End.Line
}
return locI.Range.End.Character > locJ.Range.End.Character
})

// Use a stack-like approach or simple active parent tracking
// Since we sorted by start ASC and end DESC, a candidate parent always comes first.
var activeParents []*DocumentSymbol
for _, sym := range fileSyms {
isNested := false
// Check if current symbol is nested within any of the active parents
// We only need to check the most recent ones that could still contain it
for i := len(activeParents) - 1; i >= 0; i-- {
parent := activeParents[i]
if parent.Location.Include(sym.Location) {
if !utils.Contains(c.spec.ProtectedSymbolKinds(), sym.Kind) {
isNested = true
break
}
} else if parent.Location.Range.End.Less(sym.Location.Range.Start) {
// This parent can no longer contain any future symbols (since we're sorted by start)
// But we don't necessarily need to remove it from the slice here for correctness.
}
}

if isNested {
delete(c.syms, sym.Location)
} else {
activeParents = append(activeParents, sym)
}
}
}
}

func (c *Collector) exportSymbol(repo *uniast.Repository, symbol *DocumentSymbol, refName string, visited map[*DocumentSymbol]*uniast.Identity) (id *uniast.Identity, e error) {
defer func() {
if e != nil && e != ErrStdSymbol && e != ErrExternalSymbol {
Expand Down Expand Up @@ -207,29 +302,18 @@ func (c *Collector) exportSymbol(repo *uniast.Repository, symbol *DocumentSymbol
return
}

// Java IPC mode: external/JDK/third-party symbols are exported as one-layer stub identities,
// and MUST NOT create module/package entries in repo.
//// Java IPC mode: external/JDK/third-party symbols
//// For external symbols, we set the module and continue with normal export flow
isJavaIPC := c.Language == uniast.Java && c.javaIPC != nil

if isJavaIPC && !c.internal(symbol.Location) {
name := symbol.Name
if name == "" {
if refName == "" {
e = fmt.Errorf("both symbol %v name and refname is empty", symbol)
return
}
name = refName
}
m := "@external"
// Determine module name based on URI path
fp := symbol.Location.URI.File()
if strings.Contains(fp, "abcoder-jdk") {
m = "@jdk"
} else if strings.Contains(fp, "abcoder-third") {
m = "@third"
mod = "jdk"
} else if strings.Contains(fp, "abcoder-unknown") {
mod = "unknown"
}
tmp := uniast.NewIdentity(m, "external", name)
id = &tmp
visited[symbol] = id
return id, nil
}
if !c.NeedStdSymbol && mod == "" {
e = ErrStdSymbol
Expand Down Expand Up @@ -290,12 +374,8 @@ func (c *Collector) exportSymbol(repo *uniast.Repository, symbol *DocumentSymbol
}

// map receiver to methods
receivers := make(map[*DocumentSymbol][]*DocumentSymbol, len(c.funcs)/4)
for method, rec := range c.funcs {
if method.Kind == SKMethod && rec.Method != nil && rec.Method.Receiver.Symbol != nil {
receivers[rec.Method.Receiver.Symbol] = append(receivers[rec.Method.Receiver.Symbol], method)
}
}
// Using pre-computed receivers map from c.receivers
receivers := c.receivers

switch k := symbol.Kind; k {
// Function
Expand Down Expand Up @@ -442,6 +522,7 @@ func (c *Collector) exportSymbol(repo *uniast.Repository, symbol *DocumentSymbol
Exported: public,
}
// collect deps
// collect deps
if deps := c.deps[symbol]; deps != nil {
for _, dep := range deps {
tok := ""
Expand Down
Binary file modified lang/java/ipc/abcoder-java-analyzer-1.0-SNAPSHOT.jar
Binary file not shown.
72 changes: 33 additions & 39 deletions lang/java/ipc/converter.go
Original file line number Diff line number Diff line change
Expand Up @@ -58,17 +58,6 @@ func NewConverter(repoPath string, moduleName string) *Converter {
return c
}

// ConvertResponses 将 Java Parser 的流式响应列表转换为 UniAST Repository。
func ConvertResponses(repoPath string, moduleName string, responses []*pb.AnalyzeResponse) (*uniast.Repository, error) {
conv := NewConverter(repoPath, moduleName)
for _, resp := range responses {
if err := conv.ProcessResponse(resp); err != nil {
return conv.Repository(), err
}
}
return conv.Repository(), nil
}

// Repository returns the converted UniAST repository
func (c *Converter) Repository() *uniast.Repository {
return c.repo
Expand Down Expand Up @@ -125,42 +114,47 @@ func (c *Converter) processClassInfo(info *pb.ClassInfo) error {
if err != nil {
return err
}
for _, dep := range info.Dependencies {
if dep.SourceType == pb.SourceType_SOURCE_TYPE_JDK && dep.ClassName != "" {
if _, ok := c.JdkClassCache[dep.ClassName]; !ok {
depPoint := &pb.ClassInfo{
ClassName: dep.ClassName,
Source: &pb.SourceInfo{
Type: pb.SourceType_SOURCE_TYPE_JDK,
},
return nil
}

func (c *Converter) ProcessClassDepInfo() error {
for _, info := range c.LocalClassCache {
for _, dep := range info.Dependencies {
if dep.SourceType == pb.SourceType_SOURCE_TYPE_JDK && dep.ClassName != "" {
if _, ok := c.JdkClassCache[dep.ClassName]; !ok {
depPoint := &pb.ClassInfo{
ClassName: dep.ClassName,
Source: &pb.SourceInfo{
Type: pb.SourceType_SOURCE_TYPE_JDK,
},
}
putCache(depPoint, c)
}
putCache(depPoint, c)
}
}
if dep.SourceType == pb.SourceType_SOURCE_TYPE_UNKNOWN && dep.ClassName != "" {
if _, ok := c.UnknowClassCache[dep.ClassName]; !ok {
depPoint := &pb.ClassInfo{
ClassName: dep.ClassName,
Source: &pb.SourceInfo{
Type: pb.SourceType_SOURCE_TYPE_UNKNOWN,
},
if dep.SourceType == pb.SourceType_SOURCE_TYPE_UNKNOWN && dep.ClassName != "" {
if _, ok := c.UnknowClassCache[dep.ClassName]; !ok {
depPoint := &pb.ClassInfo{
ClassName: dep.ClassName,
Source: &pb.SourceInfo{
Type: pb.SourceType_SOURCE_TYPE_UNKNOWN,
},
}
putCache(depPoint, c)
}
putCache(depPoint, c)
}
}
if (dep.SourceType == pb.SourceType_SOURCE_TYPE_MAVEN || dep.SourceType == pb.SourceType_SOURCE_TYPE_EXTERNAL_JAR) && dep.ClassName != "" {
if _, ok := c.ThirdPartClassCache[dep.ClassName]; !ok {
depPoint := &pb.ClassInfo{
ClassName: dep.ClassName,
Source: &pb.SourceInfo{
Type: pb.SourceType_SOURCE_TYPE_MAVEN,
},
if (dep.SourceType == pb.SourceType_SOURCE_TYPE_MAVEN || dep.SourceType == pb.SourceType_SOURCE_TYPE_EXTERNAL_JAR) && dep.ClassName != "" {
if _, ok := c.ThirdPartClassCache[dep.ClassName]; !ok {
depPoint := &pb.ClassInfo{
ClassName: dep.ClassName,
Source: &pb.SourceInfo{
Type: pb.SourceType_SOURCE_TYPE_MAVEN,
},
}
putCache(depPoint, c)
}
putCache(depPoint, c)
}
}
}

return nil
}

Expand Down
2 changes: 1 addition & 1 deletion lang/java/ipc/server.go
Original file line number Diff line number Diff line change
Expand Up @@ -111,7 +111,7 @@ func (s *JavaParserServer) Start(ctx context.Context, repoPath string, analyzerC
return nil, fmt.Errorf("failed to create socket listener: %w", err)
}

// Step 2: Start Java subprocess
//Step 2: Start Java subprocess
if err := s.startJavaProcess(ctx); err != nil {
s.cleanup()
return nil, fmt.Errorf("failed to start Java process: %w", err)
Expand Down
51 changes: 49 additions & 2 deletions lang/java/lib_ipc.go
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,7 @@ func DefaultParserConfig() *ParserConfig {
}

return &ParserConfig{
ResolveMavenDependencies: true,
ResolveMavenDependencies: false,
IncludeExternalClasses: false,
Debug: false,
JarPath: jarPath,
Expand Down Expand Up @@ -104,9 +104,52 @@ func ParseRepositoryByIpc(ctx context.Context, repoPath string, config *ParserCo
// Create analyzer config
analyzerConfig := &pb.AnalyzerConfig{
ResolveMavenDependencies: config.ResolveMavenDependencies,
M2RepositoryPath: config.M2RepositoryPath,
ExtraJarPaths: config.ExtraJarPaths,
IncludeExternalClasses: config.IncludeExternalClasses,
ExtraConfig: make(map[string]string),
}

if config.ResolveMavenDependencies {
m2RepositoryPath := os.Getenv("MAVEN_M2_REPOSITORY_PATH")
settingsFilePath := os.Getenv("MAVEN_SETTINGS_FILE_PATH")
java8Home := os.Getenv("JAVA_8_HOME_PATH")
java11Home := os.Getenv("JAVA_11_HOME_PATH")
java17Home := os.Getenv("JAVA_17_HOME_PATH")
java21Home := os.Getenv("JAVA_21_HOME_PATH")
java25Home := os.Getenv("JAVA_25_HOME_PATH")

analyzerConfig.ExtraConfig["maven.enabled"] = "true"
if m2RepositoryPath != "" {
analyzerConfig.ExtraConfig["maven.m2RepositoryPath"] = m2RepositoryPath
}
if settingsFilePath != "" {
analyzerConfig.ExtraConfig["maven.settingsFilePath"] = settingsFilePath
}
if java8Home != "" {
analyzerConfig.ExtraConfig["maven.java8Home"] = java8Home
}
if java11Home != "" {
analyzerConfig.ExtraConfig["maven.java11Home"] = java11Home
}
if java17Home != "" {
analyzerConfig.ExtraConfig["maven.java17Home"] = java17Home
}
if java21Home != "" {
analyzerConfig.ExtraConfig["maven.java21Home"] = java21Home
}
if java25Home != "" {
analyzerConfig.ExtraConfig["maven.java25Home"] = java25Home
}
analyzerConfig.ExtraConfig["maven.timeoutSeconds"] = "600"
analyzerConfig.ExtraConfig["maven.includeScopes"] = "compile,runtime"
analyzerConfig.ExtraConfig["maven.excludeScopes"] = "test,provided"
analyzerConfig.ExtraConfig["maven.offlineMode"] = "false"
analyzerConfig.ExtraConfig["maven.skipTests"] = "true"
analyzerConfig.ExtraConfig["maven.installBeforeResolve"] = "true"
}

if config.Debug {
analyzerConfig.ExtraConfig["maven.verbose"] = "true"
}

// Create server and start analysis
Expand All @@ -127,6 +170,10 @@ func ParseRepositoryByIpc(ctx context.Context, repoPath string, config *ParserCo
log.Printf("Warning: error processing response: %v", err)
}
}
// Process class dependencies
if err := converter.ProcessClassDepInfo(); err != nil {
return nil, fmt.Errorf("failed to process class dependencies: %w", err)
}

return converter, nil
}
Loading
Loading