Skip to content

Commit d81d6fc

Browse files
adonovangopherbot
authored andcommitted
gopls/internal/util/asm: better assembly parsing
This CL adds a rudimentary parser for symbols in Go .s files. It is a placeholder for a more principled implementation, but it is sufficient to make Definition support control labels (also in this CL) and for a cross-references index (future work). + test of Definition on control label + test of asm.Parse Updates golang/go#71754 Change-Id: I2ff19b4ade130c051197d6b097a1a3dbcd95555a Reviewed-on: https://go-review.googlesource.com/c/tools/+/654335 LUCI-TryBot-Result: Go LUCI <[email protected]> Reviewed-by: Jonathan Amsterdam <[email protected]> Auto-Submit: Alan Donovan <[email protected]>
1 parent 455db21 commit d81d6fc

File tree

5 files changed

+353
-24
lines changed

5 files changed

+353
-24
lines changed

gopls/internal/goasm/definition.go

+35-24
Original file line numberDiff line numberDiff line change
@@ -2,20 +2,20 @@
22
// Use of this source code is governed by a BSD-style
33
// license that can be found in the LICENSE file.
44

5+
// Package goasm provides language-server features for files in Go
6+
// assembly language (https://go.dev/doc/asm).
57
package goasm
68

79
import (
8-
"bytes"
910
"context"
1011
"fmt"
1112
"go/token"
12-
"strings"
13-
"unicode"
1413

1514
"golang.org/x/tools/gopls/internal/cache"
1615
"golang.org/x/tools/gopls/internal/cache/metadata"
1716
"golang.org/x/tools/gopls/internal/file"
1817
"golang.org/x/tools/gopls/internal/protocol"
18+
"golang.org/x/tools/gopls/internal/util/asm"
1919
"golang.org/x/tools/gopls/internal/util/morestrings"
2020
"golang.org/x/tools/internal/event"
2121
)
@@ -41,21 +41,27 @@ func Definition(ctx context.Context, snapshot *cache.Snapshot, fh file.Handle, p
4141
return nil, err
4242
}
4343

44+
// Parse the assembly.
45+
//
46+
// TODO(adonovan): make this just another
47+
// attribute of the type-checked cache.Package.
48+
file := asm.Parse(content)
49+
4450
// Figure out the selected symbol.
4551
// For now, just find the identifier around the cursor.
46-
//
47-
// TODO(adonovan): use a real asm parser; see cmd/asm/internal/asm/parse.go.
48-
// Ideally this would just be just another attribute of the
49-
// type-checked cache.Package.
50-
nonIdentRune := func(r rune) bool { return !isIdentRune(r) }
51-
i := bytes.LastIndexFunc(content[:offset], nonIdentRune)
52-
j := bytes.IndexFunc(content[offset:], nonIdentRune)
53-
if j < 0 || j == 0 {
54-
return nil, nil // identifier runs to EOF, or not an identifier
52+
var found *asm.Ident
53+
for _, id := range file.Idents {
54+
if id.Offset <= offset && offset <= id.End() {
55+
found = &id
56+
break
57+
}
5558
}
56-
sym := string(content[i+1 : offset+j])
57-
sym = strings.ReplaceAll(sym, "·", ".") // (U+00B7 MIDDLE DOT)
58-
sym = strings.ReplaceAll(sym, "∕", "/") // (U+2215 DIVISION SLASH)
59+
if found == nil {
60+
return nil, fmt.Errorf("not an identifier")
61+
}
62+
63+
// Resolve a symbol with a "." prefix to the current package.
64+
sym := found.Name
5965
if sym != "" && sym[0] == '.' {
6066
sym = string(mp.PkgPath) + sym
6167
}
@@ -92,18 +98,23 @@ func Definition(ctx context.Context, snapshot *cache.Snapshot, fh file.Handle, p
9298
if err == nil {
9399
return []protocol.Location{loc}, nil
94100
}
95-
}
96101

97-
// TODO(adonovan): support jump to var, block label, and other
98-
// TEXT, DATA, and GLOBAL symbols in the same file. Needs asm parser.
102+
} else {
103+
// local symbols (funcs, vars, labels)
104+
for _, id := range file.Idents {
105+
if id.Name == found.Name &&
106+
(id.Kind == asm.Text || id.Kind == asm.Global || id.Kind == asm.Label) {
99107

100-
return nil, nil
101-
}
108+
loc, err := mapper.OffsetLocation(id.Offset, id.End())
109+
if err != nil {
110+
return nil, err
111+
}
112+
return []protocol.Location{loc}, nil
113+
}
114+
}
115+
}
102116

103-
// The assembler allows center dot (· U+00B7) and
104-
// division slash (∕ U+2215) to work as identifier characters.
105-
func isIdentRune(r rune) bool {
106-
return unicode.IsLetter(r) || unicode.IsDigit(r) || r == '_' || r == '·' || r == '∕'
117+
return nil, nil
107118
}
108119

109120
// TODO(rfindley): avoid the duplicate column mapping here, by associating a

gopls/internal/golang/assembly.go

+3
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,9 @@ package golang
1010
// - ./codeaction.go - computes the symbol and offers the CodeAction command.
1111
// - ../server/command.go - handles the command by opening a web page.
1212
// - ../server/server.go - handles the HTTP request and calls this function.
13+
//
14+
// For language-server behavior in Go assembly language files,
15+
// see [golang.org/x/tools/gopls/internal/goasm].
1316

1417
import (
1518
"bytes"

gopls/internal/test/marker/testdata/definition/asm.txt

+3
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,9 @@ var _ = ff // pacify unusedfunc analyzer
2626
TEXT ·ff(SB), $16 //@ loc(ffasm, "ff"), def("ff", ffgo)
2727
CALL example·com∕b·B //@ def("com", bB)
2828
JMP ·ff //@ def("ff", ffgo)
29+
JMP label //@ def("label", label)
30+
label: //@ loc(label,"label")
31+
RET
2932

3033
-- b/b.go --
3134
package b

gopls/internal/util/asm/parse.go

+245
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,245 @@
1+
// Copyright 2025 The Go Authors. All rights reserved.
2+
// Use of this source code is governed by a BSD-style
3+
// license that can be found in the LICENSE file.
4+
5+
// Package asm provides a simple parser for Go assembly files.
6+
package asm
7+
8+
import (
9+
"bufio"
10+
"bytes"
11+
"fmt"
12+
"strings"
13+
"unicode"
14+
)
15+
16+
// Kind describes the nature of an identifier in an assembly file.
17+
type Kind uint8
18+
19+
const (
20+
Invalid Kind = iota // reserved zero value; not used by Ident
21+
Ref // arbitrary reference to symbol or control label
22+
Text // definition of TEXT (function) symbol
23+
Global // definition of GLOBL (var) symbol
24+
Data // initialization of GLOBL (var) symbol; effectively a reference
25+
Label // definition of control label
26+
)
27+
28+
func (k Kind) String() string {
29+
if int(k) < len(kindString) {
30+
return kindString[k]
31+
}
32+
return fmt.Sprintf("Kind(%d)", k)
33+
}
34+
35+
var kindString = [...]string{
36+
Invalid: "invalid",
37+
Ref: "ref",
38+
Text: "text",
39+
Global: "global",
40+
Data: "data",
41+
Label: "label",
42+
}
43+
44+
// A file represents a parsed file of Go assembly language.
45+
type File struct {
46+
Idents []Ident
47+
48+
// TODO(adonovan): use token.File? This may be important in a
49+
// future in which analyzers can report diagnostics in .s files.
50+
}
51+
52+
// Ident represents an identifier in an assembly file.
53+
type Ident struct {
54+
Name string // symbol name (after correcting [·∕]); Name[0]='.' => current package
55+
Offset int // zero-based byte offset
56+
Kind Kind
57+
}
58+
59+
// End returns the identifier's end offset.
60+
func (id Ident) End() int { return id.Offset + len(id.Name) }
61+
62+
// Parse extracts identifiers from Go assembly files.
63+
// Since it is a best-effort parser, it never returns an error.
64+
func Parse(content []byte) *File {
65+
var idents []Ident
66+
offset := 0 // byte offset of start of current line
67+
68+
// TODO(adonovan) use a proper tokenizer that respects
69+
// comments, string literals, line continuations, etc.
70+
scan := bufio.NewScanner(bytes.NewReader(content))
71+
for ; scan.Scan(); offset += len(scan.Bytes()) + len("\n") {
72+
line := scan.Text()
73+
74+
// Strip comments.
75+
if idx := strings.Index(line, "//"); idx >= 0 {
76+
line = line[:idx]
77+
}
78+
79+
// Skip blank lines.
80+
if strings.TrimSpace(line) == "" {
81+
continue
82+
}
83+
84+
// Check for label definitions (ending with colon).
85+
if colon := strings.IndexByte(line, ':'); colon > 0 {
86+
label := strings.TrimSpace(line[:colon])
87+
if isIdent(label) {
88+
idents = append(idents, Ident{
89+
Name: label,
90+
Offset: offset + strings.Index(line, label),
91+
Kind: Label,
92+
})
93+
continue
94+
}
95+
}
96+
97+
// Split line into words.
98+
words := strings.Fields(line)
99+
if len(words) == 0 {
100+
continue
101+
}
102+
103+
// A line of the form
104+
// TEXT ·sym<ABIInternal>(SB),NOSPLIT,$12
105+
// declares a text symbol "·sym".
106+
if len(words) > 1 {
107+
kind := Invalid
108+
switch words[0] {
109+
case "TEXT":
110+
kind = Text
111+
case "GLOBL":
112+
kind = Global
113+
case "DATA":
114+
kind = Data
115+
}
116+
if kind != Invalid {
117+
sym := words[1]
118+
sym = cutBefore(sym, ",") // strip ",NOSPLIT,$12" etc
119+
sym = cutBefore(sym, "(") // "sym(SB)" -> "sym"
120+
sym = cutBefore(sym, "<") // "sym<ABIInternal>" -> "sym"
121+
sym = strings.TrimSpace(sym)
122+
if isIdent(sym) {
123+
// (The Index call assumes sym is not itself "TEXT" etc.)
124+
idents = append(idents, Ident{
125+
Name: cleanup(sym),
126+
Kind: kind,
127+
Offset: offset + strings.Index(line, sym),
128+
})
129+
}
130+
continue
131+
}
132+
}
133+
134+
// Find references in the rest of the line.
135+
pos := 0
136+
for _, word := range words {
137+
// Find actual position of word within line.
138+
tokenPos := strings.Index(line[pos:], word)
139+
if tokenPos < 0 {
140+
panic(line)
141+
}
142+
tokenPos += pos
143+
pos = tokenPos + len(word)
144+
145+
// Reject probable instruction mnemonics (e.g. MOV).
146+
if len(word) >= 2 && word[0] != '·' &&
147+
!strings.ContainsFunc(word, unicode.IsLower) {
148+
continue
149+
}
150+
151+
if word[0] == '$' {
152+
word = word[1:]
153+
tokenPos++
154+
155+
// Reject probable immediate values (e.g. "$123").
156+
if !strings.ContainsFunc(word, isNonDigit) {
157+
continue
158+
}
159+
}
160+
161+
// Reject probably registers (e.g. "PC").
162+
if len(word) <= 3 && !strings.ContainsFunc(word, unicode.IsLower) {
163+
continue
164+
}
165+
166+
// Probable identifier reference.
167+
//
168+
// TODO(adonovan): handle FP symbols correctly;
169+
// sym+8(FP) is essentially a comment about
170+
// stack slot 8, not a reference to a symbol
171+
// with a declaration somewhere; so they form
172+
// an equivalence class without a canonical
173+
// declaration.
174+
//
175+
// TODO(adonovan): handle pseudoregisters and field
176+
// references such as:
177+
// MOVD $runtime·g0(SB), g // pseudoreg
178+
// MOVD R0, g_stackguard0(g) // field ref
179+
180+
sym := cutBefore(word, "(") // "·sym(SB)" => "sym"
181+
sym = cutBefore(sym, "+") // "sym+8(FP)" => "sym"
182+
sym = cutBefore(sym, "<") // "sym<ABIInternal>" =>> "sym"
183+
if isIdent(sym) {
184+
idents = append(idents, Ident{
185+
Name: cleanup(sym),
186+
Kind: Ref,
187+
Offset: offset + tokenPos,
188+
})
189+
}
190+
}
191+
}
192+
193+
_ = scan.Err() // ignore scan errors
194+
195+
return &File{Idents: idents}
196+
}
197+
198+
// isIdent reports whether s is a valid Go assembly identifier.
199+
func isIdent(s string) bool {
200+
for i, r := range s {
201+
if !isIdentRune(r, i) {
202+
return false
203+
}
204+
}
205+
return len(s) > 0
206+
}
207+
208+
// cutBefore returns the portion of s before the first occurrence of sep, if any.
209+
func cutBefore(s, sep string) string {
210+
if before, _, ok := strings.Cut(s, sep); ok {
211+
return before
212+
}
213+
return s
214+
}
215+
216+
// cleanup converts a symbol name from assembler syntax to linker syntax.
217+
func cleanup(sym string) string {
218+
return repl.Replace(sym)
219+
}
220+
221+
var repl = strings.NewReplacer(
222+
"·", ".", // (U+00B7 MIDDLE DOT)
223+
"∕", "/", // (U+2215 DIVISION SLASH)
224+
)
225+
226+
func isNonDigit(r rune) bool { return !unicode.IsDigit(r) }
227+
228+
// -- plundered from GOROOT/src/cmd/asm/internal/asm/parse.go --
229+
230+
// We want center dot (·) and division slash (∕) to work as identifier characters.
231+
func isIdentRune(ch rune, i int) bool {
232+
if unicode.IsLetter(ch) {
233+
return true
234+
}
235+
switch ch {
236+
case '_': // Underscore; traditional.
237+
return true
238+
case '\u00B7': // Represents the period in runtime.exit. U+00B7 '·' middle dot
239+
return true
240+
case '\u2215': // Represents the slash in runtime/debug.setGCPercent. U+2215 '∕' division slash
241+
return true
242+
}
243+
// Digits are OK only after the first character.
244+
return i > 0 && unicode.IsDigit(ch)
245+
}

0 commit comments

Comments
 (0)