Skip to content

Commit 17daf6a

Browse files
Change syntax for suffix file-types configurations (#4414)
The change in d801a66 to search for suffixes in `file-types` is too permissive: files like the tutor or `*.txt` files are now mistakenly interpreted as R or perl, respectively. This change changes the syntax for specifying a file-types entry that matches by suffix: ```toml file-types = [{ suffix = ".git/config" }] ``` And changes the file-type detection to first search for any non-suffix patterns and then search for suffixes only with the file-types entries marked explicitly as suffixes.
1 parent 131d839 commit 17daf6a

File tree

3 files changed

+119
-16
lines changed

3 files changed

+119
-16
lines changed

book/src/languages.md

+27-1
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,7 @@ These configuration keys are available:
5050
| `name` | The name of the language |
5151
| `scope` | A string like `source.js` that identifies the language. Currently, we strive to match the scope names used by popular TextMate grammars and by the Linguist library. Usually `source.<name>` or `text.<name>` in case of markup languages |
5252
| `injection-regex` | regex pattern that will be tested against a language name in order to determine whether this language should be used for a potential [language injection][treesitter-language-injection] site. |
53-
| `file-types` | The filetypes of the language, for example `["yml", "yaml"]`. This attempts to match by exact file name (`.zshrc`), then by file extension (`toml`), then by path suffix (`.git/config`). |
53+
| `file-types` | The filetypes of the language, for example `["yml", "yaml"]`. See the file-type detection section below. |
5454
| `shebangs` | The interpreters from the shebang line, for example `["sh", "bash"]` |
5555
| `roots` | A set of marker files to look for when trying to find the workspace root. For example `Cargo.lock`, `yarn.lock` |
5656
| `auto-format` | Whether to autoformat this language when saving |
@@ -63,6 +63,32 @@ These configuration keys are available:
6363
| `formatter` | The formatter for the language, it will take precedence over the lsp when defined. The formatter must be able to take the original file as input from stdin and write the formatted file to stdout |
6464
| `max-line-length` | Maximum line length. Used for the `:reflow` command |
6565

66+
### File-type detection and the `file-types` key
67+
68+
Helix determines which language configuration to use with the `file-types` key
69+
from the above section. `file-types` is a list of strings or tables, for
70+
example:
71+
72+
```toml
73+
file-types = ["Makefile", "toml", { suffix = ".git/config" }]
74+
```
75+
76+
When determining a language configuration to use, Helix searches the file-types
77+
with the following priorities:
78+
79+
1. Exact match: if the filename of a file is an exact match of a string in a
80+
`file-types` list, that language wins. In the example above, `"Makefile"`
81+
will match against `Makefile` files.
82+
2. Extension: if there are no exact matches, any `file-types` string that
83+
matches the file extension of a given file wins. In the example above, the
84+
`"toml"` matches files like `Cargo.toml` or `languages.toml`.
85+
3. Suffix: if there are still no matches, any values in `suffix` tables
86+
are checked against the full path of the given file. In the example above,
87+
the `{ suffix = ".git/config" }` would match against any `config` files
88+
in `.git` directories. Note: `/` is used as the directory separator but is
89+
replaced at runtime with the appropriate path separator for the operating
90+
system, so this rule would match against `.git\config` files on Windows.
91+
6692
### Language Server configuration
6793

6894
The `language-server` field takes the following keys:

helix-core/src/syntax.rs

+90-12
Original file line numberDiff line numberDiff line change
@@ -73,11 +73,11 @@ impl Default for Configuration {
7373
pub struct LanguageConfiguration {
7474
#[serde(rename = "name")]
7575
pub language_id: String, // c-sharp, rust
76-
pub scope: String, // source.rust
77-
pub file_types: Vec<String>, // filename ends_with? <Gemfile, rb, etc>
76+
pub scope: String, // source.rust
77+
pub file_types: Vec<FileType>, // filename extension or ends_with? <Gemfile, rb, etc>
7878
#[serde(default)]
7979
pub shebangs: Vec<String>, // interpreter(s) associated with language
80-
pub roots: Vec<String>, // these indicate project roots <.git, Cargo.toml>
80+
pub roots: Vec<String>, // these indicate project roots <.git, Cargo.toml>
8181
pub comment_token: Option<String>,
8282
pub max_line_length: Option<usize>,
8383

@@ -125,6 +125,78 @@ pub struct LanguageConfiguration {
125125
pub rulers: Option<Vec<u16>>, // if set, override editor's rulers
126126
}
127127

128+
#[derive(Debug, PartialEq, Eq, Hash)]
129+
pub enum FileType {
130+
/// The extension of the file, either the `Path::extension` or the full
131+
/// filename if the file does not have an extension.
132+
Extension(String),
133+
/// The suffix of a file. This is compared to a given file's absolute
134+
/// path, so it can be used to detect files based on their directories.
135+
Suffix(String),
136+
}
137+
138+
impl Serialize for FileType {
139+
fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
140+
where
141+
S: serde::Serializer,
142+
{
143+
use serde::ser::SerializeMap;
144+
145+
match self {
146+
FileType::Extension(extension) => serializer.serialize_str(extension),
147+
FileType::Suffix(suffix) => {
148+
let mut map = serializer.serialize_map(Some(1))?;
149+
map.serialize_entry("suffix", &suffix.replace(std::path::MAIN_SEPARATOR, "/"))?;
150+
map.end()
151+
}
152+
}
153+
}
154+
}
155+
156+
impl<'de> Deserialize<'de> for FileType {
157+
fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
158+
where
159+
D: serde::de::Deserializer<'de>,
160+
{
161+
struct FileTypeVisitor;
162+
163+
impl<'de> serde::de::Visitor<'de> for FileTypeVisitor {
164+
type Value = FileType;
165+
166+
fn expecting(&self, formatter: &mut std::fmt::Formatter) -> std::fmt::Result {
167+
formatter.write_str("string or table")
168+
}
169+
170+
fn visit_str<E>(self, value: &str) -> Result<Self::Value, E>
171+
where
172+
E: serde::de::Error,
173+
{
174+
Ok(FileType::Extension(value.to_string()))
175+
}
176+
177+
fn visit_map<M>(self, mut map: M) -> Result<Self::Value, M::Error>
178+
where
179+
M: serde::de::MapAccess<'de>,
180+
{
181+
match map.next_entry::<String, String>()? {
182+
Some((key, suffix)) if key == "suffix" => Ok(FileType::Suffix(
183+
suffix.replace('/', &std::path::MAIN_SEPARATOR.to_string()),
184+
)),
185+
Some((key, _value)) => Err(serde::de::Error::custom(format!(
186+
"unknown key in `file-types` list: {}",
187+
key
188+
))),
189+
None => Err(serde::de::Error::custom(
190+
"expected a `suffix` key in the `file-types` entry",
191+
)),
192+
}
193+
}
194+
}
195+
196+
deserializer.deserialize_any(FileTypeVisitor)
197+
}
198+
}
199+
128200
#[derive(Debug, Serialize, Deserialize)]
129201
#[serde(rename_all = "kebab-case")]
130202
pub struct LanguageServerConfiguration {
@@ -454,7 +526,8 @@ impl LanguageConfiguration {
454526
pub struct Loader {
455527
// highlight_names ?
456528
language_configs: Vec<Arc<LanguageConfiguration>>,
457-
language_config_ids_by_file_type: HashMap<String, usize>, // Vec<usize>
529+
language_config_ids_by_extension: HashMap<String, usize>, // Vec<usize>
530+
language_config_ids_by_suffix: HashMap<String, usize>,
458531
language_config_ids_by_shebang: HashMap<String, usize>,
459532

460533
scopes: ArcSwap<Vec<String>>,
@@ -464,7 +537,8 @@ impl Loader {
464537
pub fn new(config: Configuration) -> Self {
465538
let mut loader = Self {
466539
language_configs: Vec::new(),
467-
language_config_ids_by_file_type: HashMap::new(),
540+
language_config_ids_by_extension: HashMap::new(),
541+
language_config_ids_by_suffix: HashMap::new(),
468542
language_config_ids_by_shebang: HashMap::new(),
469543
scopes: ArcSwap::from_pointee(Vec::new()),
470544
};
@@ -475,10 +549,14 @@ impl Loader {
475549

476550
for file_type in &config.file_types {
477551
// entry().or_insert(Vec::new).push(language_id);
478-
let file_type = file_type.replace('/', &std::path::MAIN_SEPARATOR.to_string());
479-
loader
480-
.language_config_ids_by_file_type
481-
.insert(file_type, language_id);
552+
match file_type {
553+
FileType::Extension(extension) => loader
554+
.language_config_ids_by_extension
555+
.insert(extension.clone(), language_id),
556+
FileType::Suffix(suffix) => loader
557+
.language_config_ids_by_suffix
558+
.insert(suffix.clone(), language_id),
559+
};
482560
}
483561
for shebang in &config.shebangs {
484562
loader
@@ -498,14 +576,14 @@ impl Loader {
498576
let configuration_id = path
499577
.file_name()
500578
.and_then(|n| n.to_str())
501-
.and_then(|file_name| self.language_config_ids_by_file_type.get(file_name))
579+
.and_then(|file_name| self.language_config_ids_by_extension.get(file_name))
502580
.or_else(|| {
503581
path.extension()
504582
.and_then(|extension| extension.to_str())
505-
.and_then(|extension| self.language_config_ids_by_file_type.get(extension))
583+
.and_then(|extension| self.language_config_ids_by_extension.get(extension))
506584
})
507585
.or_else(|| {
508-
self.language_config_ids_by_file_type
586+
self.language_config_ids_by_suffix
509587
.iter()
510588
.find_map(|(file_type, id)| {
511589
if path.to_str()?.ends_with(file_type) {

languages.toml

+2-3
Original file line numberDiff line numberDiff line change
@@ -1053,8 +1053,7 @@ source = { git = "https://github.com/tree-sitter/tree-sitter-regex", rev = "e1cf
10531053
name = "git-config"
10541054
scope = "source.gitconfig"
10551055
roots = []
1056-
# TODO: allow specifying file-types as a regex so we can read directory names (e.g. `.git/config`)
1057-
file-types = [".gitmodules", ".gitconfig", ".git/config", ".config/git/config"]
1056+
file-types = [".gitmodules", ".gitconfig", { suffix = ".git/config" }, { suffix = ".config/git/config" }]
10581057
injection-regex = "git-config"
10591058
comment-token = "#"
10601059
indent = { tab-width = 4, unit = "\t" }
@@ -1491,7 +1490,7 @@ source = { git = "https://github.com/bearcove/tree-sitter-meson", rev = "feea83b
14911490
[[language]]
14921491
name = "sshclientconfig"
14931492
scope = "source.sshclientconfig"
1494-
file-types = [".ssh/config", "/etc/ssh/ssh_config"]
1493+
file-types = [{ suffix = ".ssh/config" }, { suffix = "/etc/ssh/ssh_config" }]
14951494
roots = []
14961495

14971496
[[grammar]]

0 commit comments

Comments
 (0)