Skip to content

Commit 1872415

Browse files
simonmarfacebook-github-bot
authored andcommitted
New Haskell Indexer (#511)
Summary: Redesigned the schema and rewrote the indexer. Compared with the previous indexer: * this captures a lot more xrefs (e.g. local variables) * it has more information (distinguishes functions/classes/constructors etc.) * it is much simpler and probably faster, because it doesn't go via hiedb, it reads .hie files directly. * it is probably more correct, I fixed a lot of things. The schema is carefully designed so that a Name uniquely identifies an entity and corresponds fairly closely to GHC's Name, including OccName. The main difference is we don't store Uniques, instead we distinguish local Names by including their ByteSpan. There are a couple of snapshot tests, one for the plain indexer and one for the codemarkup layer, and a Glass regression test. Not done yet: * we can extract types from the .hie file too, and provide type hovers in Glass. That wouldn't be too hard. * extracting more structure so that we can reconstruct data/class decls should be possible but it's not straightforward using .hie. I'm still thinking about how best to do that. Pull Request resolved: #511 Reviewed By: rubmary Differential Revision: D74400980 Pulled By: jjuliamolin fbshipit-source-id: 6cb183b96ef1c7030c8b7278434f84a4d72ceb28
1 parent affb09d commit 1872415

File tree

127 files changed

+14791
-4736
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

127 files changed

+14791
-4736
lines changed

.github/workflows/ci.yml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -171,9 +171,9 @@ jobs:
171171
- name: Build glean-clang
172172
run: make glean-clang
173173

174-
- if: matrix.ghc != '8.6.5' && matrix.ghc != '9.4.7'
175-
name: Build hiedb-indexer
176-
run: make glean-hiedb
174+
- if: matrix.ghc != '8.6.5' && matrix.ghc != '8.8.4' && matrix.ghc != '8.10.7'
175+
name: Build hie-indexer
176+
run: make glean-hie
177177

178178
- name: Run tests
179179
run: make test

Makefile

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -315,9 +315,9 @@ glass::
315315
glean-clang:: gen-schema glean glean.cabal cxx-libraries glean/schema/cpp/schema.h
316316
$(CABAL) build glean-clang
317317

318-
.PHONY: glean-hiedb
319-
glean-hiedb:: glean.cabal cxx-libraries
320-
$(CABAL) build hiedb-indexer
318+
.PHONY: glean-hie
319+
glean-hie:: glean.cabal cxx-libraries
320+
$(CABAL) build hie-indexer
321321

322322
define bash_macros
323323
call_cabal() {

glean.cabal.in

Lines changed: 54 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -65,6 +65,12 @@ common fb-cpp
6565
common exe
6666
ghc-options: -threaded -rtsopts
6767

68+
common haskell-indexer
69+
if impl(ghc >= 9.2)
70+
buildable: True
71+
else
72+
buildable: False
73+
6874
flag clang
6975
default: False
7076

@@ -1098,40 +1104,26 @@ executable hack-derive
10981104
glean:stubs,
10991105
glean:util,
11001106

1101-
-- Haskell indexer via hiedb
1102-
executable hiedb-indexer
1103-
import: deps, fb-haskell, exe
1104-
if impl(ghc >= 8.8 && < 9.4)
1105-
buildable: True
1106-
else
1107-
buildable: False
1107+
-- Haskell indexer via hie
1108+
executable hie-indexer
1109+
import: deps, fb-haskell, exe, haskell-indexer
11081110
hs-source-dirs: glean/lang/haskell
1109-
main-is: HieDBIndexer/Main.hs
1111+
main-is: HieIndexer/Main.hs
11101112
other-modules:
1111-
HieDBIndexer.Builder
1112-
HieDBIndexer.DefaultMain
1113-
HieDBIndexer.Glean
1114-
HieDBIndexer.HieDB
1115-
HieDBIndexer.Options
1116-
HieDBIndexer.Trace
1117-
HieDBIndexer.Types
1118-
ghc-options: -main-is HieDBIndexer.Main
1113+
HieIndexer.Index
1114+
HieIndexer.Options
1115+
ghc-options: -main-is HieIndexer.Main
11191116
build-depends:
11201117
ghc,
11211118
glean:client-hs,
11221119
glean:client-hs-local,
11231120
glean:core,
1124-
glean:db,
1125-
glean:if-glean-hs,
11261121
glean:lib,
1127-
glean:lib-derive,
11281122
glean:schema,
11291123
glean:stubs,
11301124
glean:util,
11311125
hie-compat < 0.3.1.2,
1132-
hiedb < 0.4.3,
1133-
split,
1134-
sqlite-simple,
1126+
hiedb < 0.4.3
11351127

11361128
-- -----------------------------------------------------------------------------
11371129
-- LSIF support
@@ -2153,8 +2145,34 @@ test-suite glean-snapshot-hack
21532145
if !flag(hack-tests)
21542146
buildable: False
21552147

2148+
test-suite glean-snapshot-haskell
2149+
import: fb-haskell, fb-cpp, deps, exe, haskell-indexer
2150+
hs-source-dirs: glean/lang/haskell/tests
2151+
type: exitcode-stdio-1.0
2152+
main-is: Main.hs
2153+
ghc-options: -main-is Main
2154+
build-depends:
2155+
glean:regression-test-lib,
2156+
glean:indexers
2157+
build-tool-depends:
2158+
glean:hie-indexer,
2159+
glean:glean
2160+
2161+
test-suite glean-snapshot-codemarkup-haskell
2162+
import: fb-haskell, fb-cpp, deps, exe, haskell-indexer
2163+
hs-source-dirs: glean/lang/codemarkup/tests/haskell
2164+
type: exitcode-stdio-1.0
2165+
main-is: Main.hs
2166+
ghc-options: -main-is Main
2167+
build-depends:
2168+
glean:regression-test-lib,
2169+
glean:indexers
2170+
build-tool-depends:
2171+
glean:hie-indexer,
2172+
glean:glean
2173+
21562174
test-suite glean-snapshot-rust-lsif
2157-
import: fb-haskell, deps
2175+
import: fb-haskell, deps, exe
21582176
hs-source-dirs: glean/lang/rust-lsif/tests
21592177
type: exitcode-stdio-1.0
21602178
main-is: Glean/Regression/RustLsif/Main.hs
@@ -2342,6 +2360,19 @@ test-suite glass-regression-hack
23422360
if !flag(hack-tests)
23432361
buildable: False
23442362

2363+
test-suite glass-regression-haskell
2364+
import: glass-regression-deps, fb-haskell, deps, exe, haskell-indexer
2365+
type: exitcode-stdio-1.0
2366+
main-is: Glean/Glass/Regression/Haskell/Main.hs
2367+
ghc-options: -main-is Glean.Glass.Regression.Haskell.Main
2368+
other-modules: Glean.Glass.Regression.Haskell
2369+
build-depends:
2370+
glean:client-hs,
2371+
glean:indexers,
2372+
glean:util
2373+
build-tool-depends:
2374+
glean:hie-indexer
2375+
23452376
test-suite glass-regression-typescript
23462377
import: glass-regression-deps, fb-haskell, deps, exe
23472378
type: exitcode-stdio-1.0

glean/glass/Glean/Glass/RepoMapping.hs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,7 @@ gleanIndices_ = Map.fromList
4545
, (RepoName "test",
4646
[("test", Language_JavaScript)
4747
,("test", Language_Hack)
48+
,("test", Language_Haskell)
4849
,("test", Language_Cpp)
4950
,("test", Language_PreProcessor)
5051
,("test", Language_Python)

glean/glass/Glean/Glass/Search/Haskell.hs

Lines changed: 61 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -15,32 +15,73 @@ module Glean.Glass.Search.Haskell
1515

1616
import Data.Text ( Text )
1717
import qualified Data.Text as Text ( intercalate )
18+
import Util.Text
1819

1920
import Glean.Angle as Angle
2021

2122
import Glean.Glass.Search.Class
22-
import Glean.Glass.Query ( entityLocation )
2323

2424
import qualified Glean.Schema.CodeHs.Types as Haskell
25-
import qualified Glean.Schema.CodemarkupTypes.Types as Code
26-
import qualified Glean.Schema.SearchHs.Types as Haskell
27-
import qualified Glean.Schema.Src.Types as Src
25+
import qualified Glean.Schema.Hs.Types as Hs
2826

2927
instance Search (ResultLocation Haskell.Entity) where
3028
symbolSearch [] = return $ None "Haskell.symbolSearch: empty"
31-
symbolSearch toks = do
32-
searchSymbolId toks $ searchByName $ Text.intercalate "." toks
33-
34-
-- code.hs:searchByName
35-
searchByName :: Text -> Angle (ResultLocation Haskell.Entity)
36-
searchByName sym =
37-
vars $ \(ent :: Angle Haskell.Entity) (file :: Angle Src.File)
38-
(rangespan :: Angle Code.RangeSpan) (lname :: Angle Text) ->
39-
tuple (ent, file, rangespan, lname) `where_` [
40-
wild .= predicate @Haskell.SearchByName (
41-
rec $
42-
field @"name" (string sym) $
43-
field @"entity" ent
44-
end),
45-
entityLocation (alt @"hs" ent) file rangespan lname
46-
]
29+
symbolSearch toks@(pkg : rest) = do
30+
case reverse rest of
31+
end : start : ident : namespace : mod
32+
| Right e <- textToInt end,
33+
Right s <- textToInt start,
34+
Just ns <- fromNamespace namespace ->
35+
searchSymbolId toks $
36+
symbolIdQuery pkg (Text.intercalate "." (reverse mod)) ident ns
37+
(Just (s,e))
38+
ident : namespace : mod
39+
| Just ns <- fromNamespace namespace ->
40+
searchSymbolId toks $
41+
symbolIdQuery pkg (Text.intercalate "." (reverse mod)) ident ns
42+
Nothing
43+
_ -> return $ None "Haskell.symbolSearch: empty"
44+
where
45+
fromNamespace "var" = Just Hs.Namespace_var_
46+
fromNamespace "ty" = Just Hs.Namespace_tycon
47+
fromNamespace "con" = Just Hs.Namespace_datacon
48+
fromNamespace "tyvar" = Just Hs.Namespace_tyvar
49+
fromNamespace _ = Nothing
50+
51+
52+
symbolIdQuery
53+
:: Text -- ^ package (ignored (TODO))
54+
-> Text -- ^ module name
55+
-> Text -- ^ identifier
56+
-> Hs.Namespace -- ^ namespace (var, datacon, tycon, tyvar)
57+
-> Maybe (Int, Int) -- ^ span, for local names
58+
-> Angle (ResultLocation Haskell.Entity)
59+
symbolIdQuery _pkg mod ident ns sort =
60+
vars $ \name file span ->
61+
tuple (
62+
alt @"name" (asPredicate name),
63+
file,
64+
alt @"span" span,
65+
string ident
66+
) `where_` [
67+
name .= predicate @Hs.Name (
68+
rec $
69+
field @"occ" (rec $
70+
field @"name" (string ident) $
71+
field @"namespace_" (enum ns) end) $
72+
field @"mod" (rec $
73+
field @"name" (string mod) end) $
74+
field @"sort" (
75+
case sort of
76+
Nothing -> alt @"external" wild
77+
Just (s,l) -> alt @"internal" (rec $
78+
field @"start" (nat (fromIntegral s)) $
79+
field @"length" (nat (fromIntegral l)) end)
80+
) end),
81+
stmt $ predicate @Hs.DeclarationLocation (
82+
rec $
83+
field @"name" (asPredicate name) $
84+
field @"file" (asPredicate file) $
85+
field @"span" span end
86+
)
87+
]

glean/glass/Glean/Glass/SymbolId/Hs.hs

Lines changed: 53 additions & 49 deletions
Original file line numberDiff line numberDiff line change
@@ -10,70 +10,74 @@
1010

1111
module Glean.Glass.SymbolId.Hs ({- instances -}) where
1212

13-
import Data.Text (Text)
13+
import Data.Char
1414
import qualified Data.Text as Text
15+
import TextShow
1516

1617
import Glean.Glass.SymbolId.Class
1718
import Glean.Glass.Types (Name(..))
18-
import Glean.Schema.CodeHs.Types as Hs (Entity (..))
19+
import Glean.Schema.CodeHs.Types as Hs (Entity(..))
1920
import qualified Glean
2021
import qualified Glean.Schema.Hs.Types as Hs
2122
import qualified Glean.Schema.Src.Types as Src
2223

24+
-- REPO/hs/containers/Data/Map/{var|datacon|tyvar|tycon}/toList[/START/END]
25+
2326
instance Symbol Hs.Entity where
24-
toSymbol (Hs.Entity_definition d) = toSymbolPredicate d
25-
toSymbol (Hs.Entity_function_ d) = toSymbolPredicate d
26-
toSymbol (Hs.Entity_class_ d) = toSymbolPredicate d
27-
toSymbol Hs.Entity_EMPTY = return []
27+
toSymbol (Hs.Entity_name x) = toSymbolPredicate x
28+
toSymbol (Hs.Entity_mod x) = toSymbolPredicate x
29+
toSymbol _ = error "toSymbol: unknown Hs.Entity"
2830

29-
instance Symbol Hs.Definition_key where
30-
toSymbol (Hs.Definition_key name _) = do
31-
n <- Glean.keyOf name
32-
return (Text.splitOn "." n)
31+
instance Symbol Hs.Name_key where
32+
toSymbol (Hs.Name_key occ mod sort) = do
33+
m <- toSymbol mod
34+
o <- toSymbol occ
35+
s <- toSymbol sort
36+
return $ m <> o <> s
3337

34-
instance Symbol Hs.FunctionDefinition_key where
35-
toSymbol (Hs.FunctionDefinition_key fnName Src.Range{..}) = do
36-
name <- Glean.keyOf fnName
37-
fname <- Glean.keyOf range_file
38-
return (fname : Text.splitOn "." name)
38+
instance Symbol Hs.Module where
39+
toSymbol = toSymbolPredicate
3940

40-
instance Symbol Hs.Class_key where
41-
toSymbol (Hs.Class_key clsName Src.Range{..}) = do
42-
name <- Glean.keyOf clsName
43-
fname <- Glean.keyOf range_file
44-
return (fname : Text.splitOn "." name)
41+
instance Symbol Hs.OccName where
42+
toSymbol = toSymbolPredicate
4543

46-
instance ToQName Hs.Entity where
47-
toQName (Hs.Entity_definition d) = Glean.keyOf d >>= toQName
48-
toQName (Hs.Entity_function_ d) = Glean.keyOf d >>= toQName
49-
toQName (Hs.Entity_class_ d) = Glean.keyOf d >>= toQName
50-
toQName Hs.Entity_EMPTY =
51-
return $ Left "toQName: Haskell: empty qname"
52-
53-
instance ToQName Hs.Definition_key where
54-
toQName (Hs.Definition_key name _) = do
44+
instance Symbol Hs.Module_key where
45+
toSymbol (Hs.Module_key name unit) = do
46+
u <- Glean.keyOf unit
5547
n <- Glean.keyOf name
56-
return $ case reverse (Text.splitOn "." n) of
57-
[] -> Left "toQName: Haskell: empty function qname"
58-
[x] -> Right (Name x, Name "")
59-
(x:xs) -> Right (Name x, joinDotted xs)
48+
-- unit names are things like glean-0.1.0.0-inplace-core
49+
-- let's strip the version and everything after it
50+
let pkg = Text.intercalate "-" (fst (break isVer (Text.splitOn "-" u)))
51+
return (pkg : Text.splitOn "." n)
52+
where
53+
isVer t
54+
| Just (d, _) <- Text.uncons t = isDigit d
55+
| otherwise = False
6056

61-
instance ToQName Hs.FunctionDefinition_key where
62-
toQName (Hs.FunctionDefinition_key fnName _) = do
63-
name <- Glean.keyOf fnName
64-
return $ case reverse (Text.splitOn "." name) of
65-
[] -> Left "toQName: Haskell: empty function qname"
66-
[x] -> Right (Name x, Name "")
67-
(x:xs) -> Right (Name x, joinDotted xs)
57+
instance Symbol Hs.OccName_key where
58+
toSymbol (Hs.OccName_key name namespace) = do
59+
let sp = case namespace of
60+
Hs.Namespace_var_ -> "var"
61+
Hs.Namespace_datacon -> "con"
62+
Hs.Namespace_tyvar -> "tyvar"
63+
Hs.Namespace_tycon -> "ty"
64+
_ -> error "namespace"
65+
return [sp,name]
6866

67+
instance Symbol Hs.NameSort where
68+
toSymbol Hs.NameSort_external{} = return []
69+
toSymbol (Hs.NameSort_internal (Src.ByteSpan start end)) =
70+
return [showt (Glean.fromNat start), showt (Glean.fromNat end)]
71+
toSymbol _ = error "toSymbol: unknown Hs.NameSort"
6972

70-
instance ToQName Hs.Class_key where
71-
toQName (Hs.Class_key clsName _) = do
72-
name <- Glean.keyOf clsName
73-
return $ case reverse (Text.splitOn "." name) of
74-
[] -> Left "toQName: Haskell: empty class qname"
75-
[x] -> Right (Name x, Name "")
76-
(x:xs) -> Right (Name x, joinDotted xs)
73+
instance ToQName Hs.Entity where
74+
toQName (Hs.Entity_name n) = Glean.keyOf n >>= toQName
75+
toQName (Hs.Entity_mod _) = error "TODO: ToQName Hs.Entity_mod"
76+
toQName _ = error "ToQName: unknown Hs.Entity"
7777

78-
joinDotted :: [Text] -> Name
79-
joinDotted = Name . Text.intercalate "." . reverse
78+
instance ToQName Hs.Name_key where
79+
toQName (Hs.Name_key occ mod _) = do
80+
Hs.Module_key m _ <- Glean.keyOf mod
81+
modname <- Glean.keyOf m
82+
Hs.OccName_key n _ <- Glean.keyOf occ
83+
return $ Right (Name modname, Name n)
Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
{-
2+
Copyright (c) Meta Platforms, Inc. and affiliates.
3+
All rights reserved.
4+
5+
This source code is licensed under the BSD-style license found in the
6+
LICENSE file in the root directory of this source tree.
7+
-}
8+
9+
module Glean.Glass.Regression.Haskell (main) where
10+
11+
import Glean.Indexer.Haskell as Haskell ( indexer )
12+
import Glean.Glass.Regression.Snapshot ( mainGlassSnapshot )
13+
14+
main :: IO ()
15+
main = mainGlassSnapshot testName testPath testIndexer (const [])
16+
where
17+
testName = "glass-regression-haskell"
18+
testPath = "glean/glass/test/regression/tests/haskell"
19+
testIndexer = Haskell.indexer

0 commit comments

Comments
 (0)