|
| 1 | +using System; |
| 2 | +using System.Collections.Generic; |
| 3 | +using System.Linq; |
| 4 | +using System.Text; |
| 5 | +using NUnit.Framework; |
| 6 | +using NUnit.Framework.Legacy; |
| 7 | +using Flow.Launcher.Infrastructure; |
| 8 | +using ToolGood.Words.Pinyin; |
| 9 | + |
| 10 | +namespace Flow.Launcher.Test |
| 11 | +{ |
| 12 | + /// <summary> |
| 13 | + /// Performance test comparing ContainsChinese() vs WordsHelper.HasChinese() |
| 14 | + /// |
| 15 | + /// This test verifies: |
| 16 | + /// 1. Both methods produce identical results (correctness) |
| 17 | + /// 2. Performance characteristics of both implementations |
| 18 | + /// 3. Memory allocation patterns |
| 19 | + /// |
| 20 | + /// The ContainsChinese() method uses optimized Unicode range checking with ReadOnlySpan |
| 21 | + /// while WordsHelper.HasChinese() uses the ToolGood.Words library implementation. |
| 22 | + /// </summary> |
| 23 | + [TestFixture] |
| 24 | + public class ChineseDetectionPerformanceTest |
| 25 | + { |
| 26 | + private readonly List<string> _testStrings = new() |
| 27 | + { |
| 28 | + // Pure English - should return false |
| 29 | + "Hello World", |
| 30 | + "Visual Studio Code", |
| 31 | + "Microsoft Office 2023", |
| 32 | + "Adobe Photoshop Creative Suite", |
| 33 | + "Google Chrome Browser Application", |
| 34 | + |
| 35 | + // Pure Chinese - should return true |
| 36 | + "你好世界", |
| 37 | + "微软办公软件", |
| 38 | + "谷歌浏览器", |
| 39 | + "北京大学计算机科学与技术学院", |
| 40 | + "中华人民共和国国家发展和改革委员会", |
| 41 | + |
| 42 | + // Mixed content - should return true |
| 43 | + "Hello 世界", |
| 44 | + "Visual Studio 代码编辑器", |
| 45 | + "QQ音乐 Music Player", |
| 46 | + "Windows 10 操作系统", |
| 47 | + "GitHub 代码仓库管理平台", |
| 48 | + |
| 49 | + // Edge cases |
| 50 | + "", |
| 51 | + " ", |
| 52 | + "123456", |
| 53 | + "!@#$%^&*()", |
| 54 | + "café résumé naïve", // Accented characters (not Chinese) |
| 55 | + |
| 56 | + // Long strings for performance testing |
| 57 | + "This is a very long English string that contains no Chinese characters but is designed to test performance with longer text content that might appear in file names or application descriptions", |
| 58 | + "这是一个非常长的中文字符串,包含了很多汉字,用来测试在处理较长中文文本时的性能表现,比如可能出现在文件名或应用程序描述中的文本内容", |
| 59 | + "This is a mixed 混合内容的字符串 that contains both English and Chinese characters 中英文混合 to test performance with 复杂的文本内容 in real-world scenarios 真实场景中的应用" |
| 60 | + }; |
| 61 | + |
| 62 | + [Test] |
| 63 | + public void ContainsChinese_CorrectnessTest() |
| 64 | + { |
| 65 | + // Verify ContainsChinese works correctly for known cases |
| 66 | + ClassicAssert.IsFalse(ContainsChinese("Hello World"), "Pure English should return false"); |
| 67 | + ClassicAssert.IsTrue(ContainsChinese("你好世界"), "Pure Chinese should return true"); |
| 68 | + ClassicAssert.IsTrue(ContainsChinese("Hello 世界"), "Mixed content should return true"); |
| 69 | + ClassicAssert.IsFalse(ContainsChinese(""), "Empty string should return false"); |
| 70 | + ClassicAssert.IsFalse(ContainsChinese("123456"), "Numbers should return false"); |
| 71 | + ClassicAssert.IsFalse(ContainsChinese("café résumé"), "Accented characters should return false"); |
| 72 | + } |
| 73 | + |
| 74 | + [Test] |
| 75 | + public void WordsHelper_CorrectnessTest() |
| 76 | + { |
| 77 | + // Verify WordsHelper.HasChinese works correctly for known cases |
| 78 | + ClassicAssert.IsFalse(WordsHelper.HasChinese("Hello World"), "Pure English should return false"); |
| 79 | + ClassicAssert.IsTrue(WordsHelper.HasChinese("你好世界"), "Pure Chinese should return true"); |
| 80 | + ClassicAssert.IsTrue(WordsHelper.HasChinese("Hello 世界"), "Mixed content should return true"); |
| 81 | + ClassicAssert.IsFalse(WordsHelper.HasChinese(""), "Empty string should return false"); |
| 82 | + ClassicAssert.IsFalse(WordsHelper.HasChinese("123456"), "Numbers should return false"); |
| 83 | + ClassicAssert.IsFalse(WordsHelper.HasChinese("café résumé"), "Accented characters should return false"); |
| 84 | + } |
| 85 | + |
| 86 | + [Test] |
| 87 | + public void BothMethods_ShouldProduceSameResults() |
| 88 | + { |
| 89 | + // Critical test: verify both methods produce identical results for all test cases |
| 90 | + foreach (var testString in _testStrings) |
| 91 | + { |
| 92 | + var wordsHelperResult = WordsHelper.HasChinese(testString); |
| 93 | + var containsChineseResult = ContainsChinese(testString); |
| 94 | + |
| 95 | + ClassicAssert.AreEqual(wordsHelperResult, containsChineseResult, |
| 96 | + $"Results differ for string: '{testString}'. WordsHelper: {wordsHelperResult}, ContainsChinese: {containsChineseResult}"); |
| 97 | + } |
| 98 | + |
| 99 | + Console.WriteLine($"✓ Both methods produce identical results for all {_testStrings.Count} test cases"); |
| 100 | + } |
| 101 | + |
| 102 | + [Test] |
| 103 | + public void PerformanceComparison_BasicBenchmark() |
| 104 | + { |
| 105 | + const int iterations = 1000000; |
| 106 | + |
| 107 | + Console.WriteLine("=== CHINESE CHARACTER DETECTION PERFORMANCE TEST ==="); |
| 108 | + Console.WriteLine($"Test iterations: {iterations:N0}"); |
| 109 | + Console.WriteLine($"Test strings: {_testStrings.Count}"); |
| 110 | + Console.WriteLine($"Total operations: {iterations * _testStrings.Count:N0}"); |
| 111 | + Console.WriteLine(); |
| 112 | + |
| 113 | + // Warmup to ensure JIT compilation |
| 114 | + Console.WriteLine("Warming up..."); |
| 115 | + for (int i = 0; i < 1000; i++) |
| 116 | + { |
| 117 | + foreach (var testString in _testStrings) |
| 118 | + { |
| 119 | + _ = ContainsChinese(testString); |
| 120 | + _ = WordsHelper.HasChinese(testString); |
| 121 | + } |
| 122 | + } |
| 123 | + |
| 124 | + // Benchmark ContainsChinese method |
| 125 | + GC.Collect(); |
| 126 | + GC.WaitForPendingFinalizers(); |
| 127 | + GC.Collect(); |
| 128 | + |
| 129 | + var sw1 = System.Diagnostics.Stopwatch.StartNew(); |
| 130 | + for (int i = 0; i < iterations; i++) |
| 131 | + { |
| 132 | + foreach (var testString in _testStrings) |
| 133 | + { |
| 134 | + _ = ContainsChinese(testString); |
| 135 | + } |
| 136 | + } |
| 137 | + sw1.Stop(); |
| 138 | + |
| 139 | + // Benchmark WordsHelper.HasChinese method |
| 140 | + GC.Collect(); |
| 141 | + GC.WaitForPendingFinalizers(); |
| 142 | + GC.Collect(); |
| 143 | + |
| 144 | + var sw2 = System.Diagnostics.Stopwatch.StartNew(); |
| 145 | + for (int i = 0; i < iterations; i++) |
| 146 | + { |
| 147 | + foreach (var testString in _testStrings) |
| 148 | + { |
| 149 | + _ = WordsHelper.HasChinese(testString); |
| 150 | + } |
| 151 | + } |
| 152 | + sw2.Stop(); |
| 153 | + |
| 154 | + // Calculate and display results |
| 155 | + var containsChineseMs = sw1.Elapsed.TotalMilliseconds; |
| 156 | + var wordsHelperMs = sw2.Elapsed.TotalMilliseconds; |
| 157 | + var speedRatio = wordsHelperMs / containsChineseMs; |
| 158 | + var timeDifference = wordsHelperMs - containsChineseMs; |
| 159 | + |
| 160 | + Console.WriteLine("RESULTS:"); |
| 161 | + Console.WriteLine($"ContainsChinese(): {containsChineseMs:F3} ms"); |
| 162 | + Console.WriteLine($"WordsHelper.HasChinese(): {wordsHelperMs:F3} ms"); |
| 163 | + Console.WriteLine($"Time difference: {timeDifference:F3} ms"); |
| 164 | + Console.WriteLine($"Speed improvement: {speedRatio:F2}x"); |
| 165 | + Console.WriteLine($"Performance gain: {((speedRatio - 1) * 100):F1}%"); |
| 166 | + Console.WriteLine(); |
| 167 | + |
| 168 | + if (speedRatio > 1.0) |
| 169 | + { |
| 170 | + Console.WriteLine($"✓ ContainsChinese() is {speedRatio:F2}x faster than WordsHelper.HasChinese()"); |
| 171 | + } |
| 172 | + else |
| 173 | + { |
| 174 | + Console.WriteLine($"⚠ WordsHelper.HasChinese() is {(1/speedRatio):F2}x faster than ContainsChinese()"); |
| 175 | + } |
| 176 | + |
| 177 | + // Test always passes - this is a measurement test |
| 178 | + ClassicAssert.IsTrue(true); |
| 179 | + } |
| 180 | + |
| 181 | + [Test] |
| 182 | + public void PerformanceComparison_ByStringType() |
| 183 | + { |
| 184 | + Console.WriteLine("=== PERFORMANCE BY STRING TYPE ==="); |
| 185 | + |
| 186 | + var categories = new Dictionary<string, List<string>> |
| 187 | + { |
| 188 | + ["Pure English"] = _testStrings.Where(s => !ContainsChinese(s) && s.All(c => c <= 127)).ToList(), |
| 189 | + ["Pure Chinese"] = _testStrings.Where(s => ContainsChinese(s) && s.All(c => IsChineseCharacter(c) || char.IsWhiteSpace(c))).ToList(), |
| 190 | + ["Mixed Content"] = _testStrings.Where(s => ContainsChinese(s) && s.Any(c => c <= 127 && char.IsLetter(c))).ToList(), |
| 191 | + ["Edge Cases"] = _testStrings.Where(s => string.IsNullOrWhiteSpace(s) || s.All(c => !char.IsLetter(c))).ToList() |
| 192 | + }; |
| 193 | + |
| 194 | + foreach (var category in categories) |
| 195 | + { |
| 196 | + if (category.Value.Count == 0) continue; |
| 197 | + |
| 198 | + Console.WriteLine($"\n{category.Key} ({category.Value.Count} strings):"); |
| 199 | + |
| 200 | + var sample = category.Value.First(); |
| 201 | + var displayText = sample.Length > 40 ? sample.Substring(0, 40) + "..." : sample; |
| 202 | + Console.WriteLine($" Sample: '{displayText}'"); |
| 203 | + |
| 204 | + const int categoryIterations = 5000; |
| 205 | + |
| 206 | + // Test each method |
| 207 | + var sw1 = System.Diagnostics.Stopwatch.StartNew(); |
| 208 | + for (int i = 0; i < categoryIterations; i++) |
| 209 | + { |
| 210 | + foreach (var str in category.Value) |
| 211 | + { |
| 212 | + _ = ContainsChinese(str); |
| 213 | + } |
| 214 | + } |
| 215 | + sw1.Stop(); |
| 216 | + |
| 217 | + var sw2 = System.Diagnostics.Stopwatch.StartNew(); |
| 218 | + for (int i = 0; i < categoryIterations; i++) |
| 219 | + { |
| 220 | + foreach (var str in category.Value) |
| 221 | + { |
| 222 | + _ = WordsHelper.HasChinese(str); |
| 223 | + } |
| 224 | + } |
| 225 | + sw2.Stop(); |
| 226 | + |
| 227 | + var ratio = (double)sw2.ElapsedTicks / sw1.ElapsedTicks; |
| 228 | + Console.WriteLine($" Performance: ContainsChinese is {ratio:F2}x faster"); |
| 229 | + } |
| 230 | + |
| 231 | + ClassicAssert.IsTrue(true); |
| 232 | + } |
| 233 | + |
| 234 | + /// <summary> |
| 235 | + /// Optimized Chinese character detection using comprehensive CJK Unicode ranges |
| 236 | + /// This method uses ReadOnlySpan for better performance and covers all CJK character ranges |
| 237 | + /// </summary> |
| 238 | + private static bool ContainsChinese(ReadOnlySpan<char> text) |
| 239 | + { |
| 240 | + foreach (var c in text) |
| 241 | + { |
| 242 | + if (IsChineseCharacter(c)) |
| 243 | + return true; |
| 244 | + } |
| 245 | + return false; |
| 246 | + } |
| 247 | + |
| 248 | + /// <summary> |
| 249 | + /// Check if a character is a Chinese character using comprehensive Unicode ranges |
| 250 | + /// Covers CJK Unified Ideographs and all extension blocks |
| 251 | + /// </summary> |
| 252 | + private static bool IsChineseCharacter(char c) |
| 253 | + { |
| 254 | + return (c >= 0x4E00 && c <= 0x9FFF) || // CJK Unified Ideographs (most common Chinese characters) |
| 255 | + (c >= 0x3400 && c <= 0x4DBF) || // CJK Extension A |
| 256 | + (c >= 0x20000 && c <= 0x2A6DF) || // CJK Extension B |
| 257 | + (c >= 0x2A700 && c <= 0x2B73F) || // CJK Extension C |
| 258 | + (c >= 0x2B740 && c <= 0x2B81F) || // CJK Extension D |
| 259 | + (c >= 0x2B820 && c <= 0x2CEAF) || // CJK Extension E |
| 260 | + (c >= 0x2CEB0 && c <= 0x2EBEF) || // CJK Extension F |
| 261 | + (c >= 0xF900 && c <= 0xFAFF) || // CJK Compatibility Ideographs |
| 262 | + (c >= 0x2F800 && c <= 0x2FA1F); // CJK Compatibility Supplement |
| 263 | + } |
| 264 | + } |
| 265 | +} |
0 commit comments