Skip to content

Commit 70435cc

Browse files
Fix corruption of sliding text window when trying to peek backwards. (#78774)
Co-authored-by: Fred Silberberg <[email protected]>
1 parent a15f64a commit 70435cc

File tree

5 files changed

+372
-13
lines changed

5 files changed

+372
-13
lines changed

src/Compilers/CSharp/Portable/Parser/Lexer.cs

Lines changed: 1 addition & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -475,12 +475,7 @@ private void ScanSyntaxToken(ref TokenInfo info)
475475
// dots followed by an integer (which will be treated as a range expression).
476476
//
477477
// Move back one space to see what's before this dot and adjust accordingly.
478-
479-
this.TextWindow.Reset(atDotPosition - 1);
480-
var priorCharacterIsDot = this.TextWindow.PeekChar() is '.';
481-
this.TextWindow.Reset(atDotPosition);
482-
483-
if (priorCharacterIsDot)
478+
if (this.TextWindow.PreviousChar() is '.')
484479
{
485480
// We have two dots in a row. Treat the second dot as a dot, not the start of a number literal.
486481
TextWindow.AdvanceChar();

src/Compilers/CSharp/Portable/Parser/SlidingTextWindow.cs

Lines changed: 30 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -3,12 +3,8 @@
33
// See the LICENSE file in the project root for more information.
44

55
using System;
6-
using System.Collections.Concurrent;
76
using System.Diagnostics;
87
using System.Text;
9-
using Microsoft.CodeAnalysis.Collections;
10-
using Microsoft.CodeAnalysis.CSharp.Symbols;
11-
using Microsoft.CodeAnalysis.CSharp.Syntax;
128
using Microsoft.CodeAnalysis.PooledObjects;
139
using Microsoft.CodeAnalysis.Text;
1410
using Roslyn.Utilities;
@@ -374,6 +370,25 @@ public char PeekChar(int delta)
374370
return ch;
375371
}
376372

373+
public char PreviousChar()
374+
{
375+
Debug.Assert(this.Position > 0);
376+
if (_offset > 0)
377+
{
378+
// The allowed region of the window that can be read is from 0 to _characterWindowCount (which _offset
379+
// is in between). So as long as _offset is greater than 0, we can read the previous character directly
380+
// from the current chunk of characters in the window.
381+
return this.CharacterWindow[_offset - 1];
382+
}
383+
384+
// The prior character isn't in the window (trying to read the current character caused us to
385+
// read in the next chunk of text into the window, throwing out the preceding characters).
386+
// Just go back to the source text to find this character. While more expensive, this should
387+
// be rare given that most of the time we won't be calling this right after loading a new text
388+
// chunk.
389+
return this.Text[this.Position - 1];
390+
}
391+
377392
/// <summary>
378393
/// If the next characters in the window match the given string,
379394
/// then advance past those characters. Otherwise, do nothing.
@@ -480,5 +495,16 @@ internal static char GetCharsFromUtf32(uint codepoint, out char lowSurrogate)
480495
return (char)((codepoint - 0x00010000) / 0x0400 + 0xD800);
481496
}
482497
}
498+
499+
internal TestAccessor GetTestAccessor()
500+
=> new TestAccessor(this);
501+
502+
internal readonly struct TestAccessor(SlidingTextWindow window)
503+
{
504+
private readonly SlidingTextWindow _window = window;
505+
506+
internal void SetDefaultCharacterWindow()
507+
=> _window._characterWindow = new char[DefaultWindowLength];
508+
}
483509
}
484510
}

src/Compilers/CSharp/Test/Syntax/LexicalAndXml/LexicalTests.cs

Lines changed: 86 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
using System.Collections.Generic;
99
using System.Globalization;
1010
using System.Linq;
11+
using System.Threading;
1112
using Microsoft.CodeAnalysis.CSharp.Syntax.InternalSyntax;
1213
using Microsoft.CodeAnalysis.CSharp.Test.Utilities;
1314
using Microsoft.CodeAnalysis.Text;
@@ -4572,5 +4573,90 @@ disabled text 2
45724573
Assert.True(trivia.ContainsDiagnostics);
45734574
Assert.Equal((int)ErrorCode.ERR_Merge_conflict_marker_encountered, trivia.Errors().Single().Code);
45744575
}
4576+
4577+
[Fact, WorkItem("https://github.com/dotnet/roslyn/issues/78593")]
4578+
public void TestDotPrefixedNumberStartingAtStartOfSlidingTextWindow()
4579+
{
4580+
// This test depends on the line endings for the file being \r\n to ensure the right contents lines up at
4581+
// the right locations.
4582+
//
4583+
// It specifically validates what happens when we see `.0` at the start of the
4584+
// sliding text window, where the lexer tries to peek back one char to see if this
4585+
// is actually `..0` (a range expr) or `.0` (a floating point number).
4586+
var code = Resources.DotPrefixedNumberStartingAtStartOfSlidingTextWindow;
4587+
if (!code.Contains("\r\n"))
4588+
code = code.Replace("\n", "\r\n");
4589+
4590+
var sourceText = SourceText.From(code);
4591+
4592+
{
4593+
// Run a full parse, and validate the tree returned).
4594+
4595+
using var lexer = new Lexer(sourceText, CSharpParseOptions.Default);
4596+
4597+
// Ensure we have a normal window size, not some larger array that another test created and cached in
4598+
// the window pool
4599+
lexer.TextWindow.GetTestAccessor().SetDefaultCharacterWindow();
4600+
4601+
using var parser = new LanguageParser(lexer, oldTree: null, changes: null);
4602+
4603+
Microsoft.CodeAnalysis.SyntaxTreeExtensions.VerifySource(
4604+
sourceText, parser.ParseCompilationUnit().CreateRed());
4605+
}
4606+
4607+
{
4608+
// Now, replicate the same conditions that hte parser runs through by driving the a new lexer here
4609+
// directly. That ensures that we are actually validating exactly the conditions that led to the bug
4610+
// (a dot token starting a number, right at the start of the character window).
4611+
var lexer = new Lexer(sourceText, CSharpParseOptions.Default);
4612+
4613+
// Ensure we have a normal window size, not some larger array that another test created and cached in
4614+
// the window pool
4615+
lexer.TextWindow.GetTestAccessor().SetDefaultCharacterWindow();
4616+
4617+
var mode = LexerMode.Syntax;
4618+
for (var i = 0; i < 1326; i++)
4619+
lexer.Lex(ref mode);
4620+
4621+
// Lexer will read from index 0 in the arrray.
4622+
Assert.Equal(0, lexer.TextWindow.Offset);
4623+
4624+
// We have 205 real chars in the window
4625+
Assert.Equal(205, lexer.TextWindow.CharacterWindowCount);
4626+
4627+
// The lexer is at position 10199 in the file.
4628+
Assert.Equal(10199, lexer.TextWindow.Position);
4629+
4630+
/// The 205 characters represent the final part of the doc
4631+
Assert.Equal(lexer.TextWindow.Text.Length, lexer.TextWindow.Position + lexer.TextWindow.CharacterWindowCount);
4632+
4633+
// We're at the start of a token.
4634+
Assert.Equal(lexer.TextWindow.LexemeStartPosition, lexer.TextWindow.Position);
4635+
4636+
// Ensure that the lexer's window is starting with the next FP number (".03") right at
4637+
// the start of the window.
4638+
Assert.True(lexer.TextWindow.CharacterWindow is ['.', '0', '3', ',', ..], $"Start of window was '{new string(lexer.TextWindow.CharacterWindow, 0, 4)}'");
4639+
4640+
var token = lexer.Lex(ref mode);
4641+
Assert.Equal(SyntaxKind.NumericLiteralToken, token.Kind);
4642+
Assert.Equal(3, token.FullWidth);
4643+
Assert.Equal(".03", token.ToString());
4644+
4645+
// But we moved 3 characters forward.
4646+
Assert.Equal(3, lexer.TextWindow.Offset);
4647+
4648+
// We still have 205 real chars in the window
4649+
Assert.Equal(205, lexer.TextWindow.CharacterWindowCount);
4650+
4651+
// The lexer position has moved 3 characters forward as well.
4652+
Assert.Equal(10202, lexer.TextWindow.Position);
4653+
4654+
// We're at the start of a token.
4655+
Assert.Equal(lexer.TextWindow.LexemeStartPosition, lexer.TextWindow.Position);
4656+
4657+
// Character window didn't changee.
4658+
Assert.True(lexer.TextWindow.CharacterWindow is ['.', '0', '3', ',', ..], $"Start of window was '{new string(lexer.TextWindow.CharacterWindow, 0, 4)}'");
4659+
}
4660+
}
45754661
}
45764662
}

0 commit comments

Comments
 (0)