Skip to content

Commit 5b67a25

Browse files
committed
fix: Fix for #141 regexes with starting and ending with .*
1 parent 23057de commit 5b67a25

File tree

3 files changed

+84
-35
lines changed

3 files changed

+84
-35
lines changed

src/main/java/com/endava/cats/generator/simple/RegexFlattener.java

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -42,11 +42,8 @@ private static String removeStartEndAnyChar(String regex) {
4242
return regex;
4343
}
4444

45-
// Remove leading unescaped '.*' or '*' patterns
4645
regex = regex.replaceAll("^(?:(?<!\\\\)\\*+\\.*|\\.*(?<!\\\\)\\*+)", "");
47-
48-
// Remove trailing unescaped '.*' or '*' patterns
49-
regex = regex.replaceAll("(?:(?<!\\\\)\\*+\\.*|\\.*(?<!\\\\)\\*+)$", "\\\\w*");
46+
regex = regex.replaceAll("(?<!\\\\)\\.\\*$", "");
5047

5148
return regex;
5249
}

src/main/java/com/endava/cats/generator/simple/StringGenerator.java

Lines changed: 48 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
import java.util.Locale;
2121
import java.util.Optional;
2222
import java.util.function.Function;
23+
import java.util.function.Supplier;
2324
import java.util.regex.Matcher;
2425
import java.util.regex.Pattern;
2526
import java.util.regex.PatternSyntaxException;
@@ -186,35 +187,64 @@ public static String generate(String pattern, int min, int max) {
186187
String cleanedPattern = cleanPattern(pattern);
187188
String flattenedPattern = RegexFlattener.flattenRegex(cleanedPattern);
188189

190+
List<Supplier<Optional<String>>> attempts = List.of(
191+
() -> generateString(pattern, min, max, cleanedPattern, flattenedPattern),
192+
() -> generateString(cleanedPattern, min, max, cleanedPattern, flattenedPattern),
193+
() -> generateString(cleanedPattern, min, max, cleanedPattern, cleanedPattern),
194+
() -> generateString(flattenedPattern, min, max, cleanedPattern, flattenedPattern),
195+
() -> generateString(flattenedPattern, min, max, flattenedPattern, flattenedPattern)
196+
);
197+
198+
return attempts.stream()
199+
.flatMap(attempt -> attempt.get().stream())
200+
.findFirst()
201+
.orElseThrow(() -> new IllegalArgumentException(
202+
String.format("Could not generate a string for pattern %s with min %d and max %d", pattern, min, max)
203+
));
204+
}
205+
206+
private static Optional<String> generateString(String pattern, int min, int max, String cleanedPattern, String flattenedPattern) {
189207
String valueBasedOnSimpleRegexes = tryGenerateWithSimpleRegexes(pattern, min, max, cleanedPattern);
190208

191209
if (valueBasedOnSimpleRegexes != null) {
192-
return valueBasedOnSimpleRegexes;
210+
return Optional.of(valueBasedOnSimpleRegexes);
193211
}
194212

195213
GeneratorParams generatorParams = new GeneratorParams(flattenedPattern, min, max, cleanedPattern);
196214

197-
String generatedWithRgxGenerator = callGenerateTwice(StringGenerator::generateUsingRgxGenerator, generatorParams);
198-
if (generatedWithRgxGenerator != null) {
199-
return generatedWithRgxGenerator;
215+
Optional<String> generatedWithInitialMinMax = callGeneratorsInOrder(generatorParams);
216+
if (generatedWithInitialMinMax.isPresent()) {
217+
return generatedWithInitialMinMax;
200218
}
201219

202220
if (min == -1 && max == -1) {
203-
GeneratorParams generatorParamsWithMinMax = new GeneratorParams(flattenedPattern, 0, 300, cleanedPattern);
204-
return generateUsingRegexpGen(generatorParamsWithMinMax);
221+
GeneratorParams generatorParamsWithMinMax = new GeneratorParams(flattenedPattern, 1, 300, cleanedPattern);
222+
223+
Optional<String> generatedWithAdjustedMinMax = callGeneratorsInOrder(generatorParamsWithMinMax);
224+
if (generatedWithAdjustedMinMax.isPresent()) {
225+
return generatedWithAdjustedMinMax;
226+
}
205227
}
206228

207-
String generatedUsingCatsRegexGenerator = callGenerateTwice(StringGenerator::generateUsingCatsRegexGenerator, generatorParams);
208-
if (generatedUsingCatsRegexGenerator != null) {
209-
return generatedUsingCatsRegexGenerator;
229+
return Optional.empty();
230+
}
231+
232+
private static Optional<String> callGeneratorsInOrder(GeneratorParams generatorParams) {
233+
String rgxGeneratedWithMinMax = callGenerateTwice(StringGenerator::generateUsingRgxGenerator, generatorParams);
234+
if (rgxGeneratedWithMinMax != null) {
235+
return Optional.of(rgxGeneratedWithMinMax);
210236
}
211237

212-
String generateUsingRegexpGen = callGenerateTwice(StringGenerator::generateUsingRegexpGen, generatorParams);
213-
if (generateUsingRegexpGen != null) {
214-
return generateUsingRegexpGen;
238+
String generatedWithCatsRegexGenerator = callGenerateTwice(StringGenerator::generateUsingCatsRegexGenerator, generatorParams);
239+
if (generatedWithCatsRegexGenerator != null) {
240+
return Optional.of(generatedWithCatsRegexGenerator);
215241
}
216242

217-
throw new IllegalArgumentException("Could not generate a string for pattern " + pattern + " with min " + min + " and max " + max);
243+
String generatedWithRegexpGen = callGenerateTwice(StringGenerator::generateUsingRegexpGen, generatorParams);
244+
if (generatedWithRegexpGen != null) {
245+
return Optional.of(generatedWithRegexpGen);
246+
}
247+
return Optional.empty();
218248
}
219249

220250
public static String tryGenerateWithSimpleRegexes(String originalPattern, int min, int max, String cleanedPattern) {
@@ -239,7 +269,10 @@ public static String callGenerateTwice(Function<GeneratorParams, String> generat
239269
LOGGER.debug("Generator {} failed #atempt 1", generator.getClass().getSimpleName());
240270
}
241271
try {
242-
String secondVersion = generator.apply(new GeneratorParams(removeLookaheadAssertions(generatorParams.cleanedPattern()), generatorParams.min, generatorParams.max, generatorParams.originalPattern()));
272+
String patternWithLookaheadsRemoved = removeLookaheadAssertions(generatorParams.cleanedPattern());
273+
LOGGER.debug("Pattern with lookaheads removed {}", patternWithLookaheadsRemoved);
274+
275+
String secondVersion = generator.apply(new GeneratorParams(patternWithLookaheadsRemoved, generatorParams.min, generatorParams.max, generatorParams.originalPattern()));
243276
if (secondVersion.matches(generatorParams.originalPattern())) {
244277
LOGGER.debug("Generated value with lookaheads removed " + secondVersion + " matched " + generatorParams.originalPattern());
245278
return secondVersion;
@@ -604,7 +637,7 @@ public static List<String> getUnsupportedMediaTypes() {
604637
*/
605638
public static String removeLookaheadAssertions(String regex) {
606639
regex = regex.replaceAll("\\(\\?=([^)]*)\\)", "($1)");
607-
regex = regex.replaceAll("\\(\\?!([^)]*)\\)", "(^$1)");
640+
regex = regex.replaceAll("\\(\\?!([^)]*)\\)", "");
608641
regex = regex.replaceAll("\\(\\?<=([^)]*)\\)", "($1)");
609642
regex = regex.replaceAll("\\(\\?<!([^)]*)\\)", "(^$1)");
610643

src/test/java/com/endava/cats/generator/simple/StringGeneratorTest.java

Lines changed: 35 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -215,7 +215,7 @@ void shouldGenerateComplexEmailRegex() {
215215
"^([0-9]{1,3}\\.){3}[0-9]{1,3}(:[0-9]{1,5})?$;^(\\d{1,3}\\.){3}\\d{1,3}(:\\d{1,5})?$",
216216
"^([0-9]{4}-[0-9]{2}-[0-9]{2}[\\s\\t\\r\\n\\f]{0,1}[0-9]{2}:[0-9]{2}:[0-9]{2})\\s\\[([^\\]]+)\\]\\s(.*)$;^(\\d{4}-\\d{2}-\\d{2}\\s?\\d{2}:\\d{2}:\\d{2})\\s\\[([^\\]]+)\\]\\s(.*)$",
217217
"^(?:4[0-9]{12}(?:[0-9]{3})?|5[1-5][0-9]{14}|3[47][0-9]{13}|3(?:0[0-5]|[68][0-9])[0-9]{11}|6(?:011|5[0-9]{2})[0-9]{12})$;^(?:4\\d{12}(?:\\d{3})?|5[1-5]\\d{14}|3[47]\\d{13}|3(?:0[0-5]|[68]\\d)\\d{11}|6(?:011|5\\d{2})\\d{12})$",
218-
".*(^arn:((aws)|(aws-cn)|(aws-us-gov)):s3:::)([a-zA-Z0-9_-]+$).*;(^arn:((aws)|(aws-cn)|(aws-us-gov)):s3:::)([a-zA-Z0-9_-]+$)\\w*", ".*;.*", "^(?!\\s*$).+;^(?!\\s*$).+",
218+
".*(^arn:((aws)|(aws-cn)|(aws-us-gov)):s3:::)([a-zA-Z0-9_-]+$).*;(^arn:((aws)|(aws-cn)|(aws-us-gov)):s3:::)([a-zA-Z0-9_-]+$)", ".*;.*", "^(?!\\s*$).+;^(?!\\s*$).+",
219219
"^[^\\u0000-\\u00FF]+$;^[\\u0100-\\uFFFF]+$"},
220220
delimiter = ';')
221221
void shouldFlatten(String regex, String expected) {
@@ -256,22 +256,41 @@ void shouldGenerateWithInnerDollarSign() {
256256

257257
@ParameterizedTest
258258
@CsvSource(value = {
259-
"'(^$)|^(((\\+|00)(9[976]\\d|8[987530]\\d|6[987]\\d|5[90]\\d|42\\d|3[875]\\d|2[98654321]\\d|9[8543210]|8[6421]|6[6543210]|5[87654321]|4[987654310]|3[9643210]|2[70]|7|1)([[:space:]]?))?([\\d]{4}\\d{1,9})$)'; 1; 6000",
260-
"'^(?!\\s*$).+'; 1; 128",
261-
"'^[^\\u0000-\\u00FF]$'; -1; -1",
262-
"'arn:aws:logs:[a-z\\-0-9]*:[0-9]{12}:log-group:([\\.\\-_/#A-Za-z0-9]+):\\*$'; 47; 562",
263-
".*(^arn:((aws)|(aws-cn)|(aws-us-gov)):s3:::)([a-zA-Z0-9_-]+$).*; 1; 6000",
264-
"[^\\r\\n]; 1; 1", "^arn:[^:]{1,63}:ec2:[^:]{0,63}:[^:]{0,63}:subnet\\/subnet-[0-9a-f]{8,17}$|^$;2;100",
265-
"(?=^.{8,64}$)((?=.*\\d)(?=.*[A-Z])(?=.*[a-z])|(?=.*\\d)(?=.*[^A-Za-z0-9\\s])(?=.*[a-z])|(?=.*[^A-Za-z0-9\\s])(?=.*[A-Z])(?=.*[a-z])|(?=.*\\d)(?=.*[A-Z])(?=.*[^A-Za-z0-9\\s]))^.*; 1; 256",
266-
"^urn:tdm:(([a-z]{2}-(gov-)?[a-z]{4,9}-[0-9]{1,3}/[0-9]+/)*[\\p{Alnum}_]+(/[\\p{Alnum}_]+)*):([\\p{Alpha}]*):([\\p{Alnum}_]+(/[\\p{Alnum}_]+)*)$;1;200",
267-
"^$|/.*;1;1024", "https://[a-zA-Z0-9-.]*\\.amazon(aws)?\\.com[/]?;1;1024"
259+
"'(^$)|^(((\\+|00)(9[976]\\d|8[987530]\\d|6[987]\\d|5[90]\\d|42\\d|3[875]\\d|2[98654321]\\d|9[8543210]|8[6421]|6[6543210]|5[87654321]|4[987654310]|3[9643210]|2[70]|7|1)([[:space:]]?))?([\\d]{4}\\d{1,9})$)'; 1; 6000; 1; 6000",
260+
"'(^$)|^(((\\+|00)(9[976]\\d|8[987530]\\d|6[987]\\d|5[90]\\d|42\\d|3[875]\\d|2[98654321]\\d|9[8543210]|8[6421]|6[6543210]|5[87654321]|4[987654310]|3[9643210]|2[70]|7|1)([[:space:]]?))?([\\d]{4}\\d{1,9})$)'; -1; -1; 1; 6000",
261+
"'^(?!\\s*$).+'; 1; 128; 1; 128",
262+
"'^(?!\\s*$).+'; -1; -1; 1; 128",
263+
"'arn:aws:logs:[a-z\\-0-9]*:[0-9]{12}:log-group:([\\.\\-_/#A-Za-z0-9]+):\\*$'; 47; 562; 47; 562",
264+
"'arn:aws:logs:[a-z\\-0-9]*:[0-9]{12}:log-group:([\\.\\-_/#A-Za-z0-9]+):\\*$'; -1; -1; 1; 562",
265+
".*(^arn:((aws)|(aws-cn)|(aws-us-gov)):s3:::)([a-zA-Z0-9_-]+$).*; 1; 6000; 1; 6000",
266+
".*(^arn:((aws)|(aws-cn)|(aws-us-gov)):s3:::)([a-zA-Z0-9_-]+$).*; -1; -1; 1; 6000",
267+
"[^\\r\\n]; 1; 1; 1; 1", "^arn:[^:]{1,63}:ec2:[^:]{0,63}:[^:]{0,63}:subnet\\/subnet-[0-9a-f]{8,17}$|^$; 2; 100; 2; 100",
268+
"[^\\r\\n]; -1; -1; 1; 1", "^arn:[^:]{1,63}:ec2:[^:]{0,63}:[^:]{0,63}:subnet\\/subnet-[0-9a-f]{8,17}$|^$; -1; -1; 2; 300",
269+
"(?=^.{8,64}$)((?=.*\\d)(?=.*[A-Z])(?=.*[a-z])|(?=.*\\d)(?=.*[^A-Za-z0-9\\s])(?=.*[a-z])|(?=.*[^A-Za-z0-9\\s])(?=.*[A-Z])(?=.*[a-z])|(?=.*\\d)(?=.*[A-Z])(?=.*[^A-Za-z0-9\\s]))^.*; 1; 512; 1; 512",
270+
"(?=^.{8,64}$)((?=.*\\d)(?=.*[A-Z])(?=.*[a-z])|(?=.*\\d)(?=.*[^A-Za-z0-9\\s])(?=.*[a-z])|(?=.*[^A-Za-z0-9\\s])(?=.*[A-Z])(?=.*[a-z])|(?=.*\\d)(?=.*[A-Z])(?=.*[^A-Za-z0-9\\s]))^.*; -1; -1; 1; 512",
271+
"^urn:tdm:(([a-z]{2}-(gov-)?[a-z]{4,9}-[0-9]{1,3}/[0-9]+/)*[\\p{Alnum}_]+(/[\\p{Alnum}_]+)*):([\\p{Alpha}]*):([\\p{Alnum}_]+(/[\\p{Alnum}_]+)*)$; 1; 200; 1; 200",
272+
"^urn:tdm:(([a-z]{2}-(gov-)?[a-z]{4,9}-[0-9]{1,3}/[0-9]+/)*[\\p{Alnum}_]+(/[\\p{Alnum}_]+)*):([\\p{Alpha}]*):([\\p{Alnum}_]+(/[\\p{Alnum}_]+)*)$; -1; -1; 1; 200",
273+
"^$|/.*; 1; 1024; 1; 1024",
274+
"^$|/.*; -1; -1; 1; 1024",
275+
"https://[a-zA-Z0-9-.]*\\.amazon(aws)?\\.com[/]?; 1; 1024; 1; 1024",
276+
"https://[a-zA-Z0-9-.]*\\.amazon(aws)?\\.com[/]?; -1; -1; 1; 1024",
277+
"^\\s*<(.|\\n)*SignalProcessingNotification(.|\\n)*>\\s*$; 1; 256; 1; 256",
278+
"^\\s*<(.|\\n)*SignalProcessingNotification(.|\\n)*>\\s*$; -1; -1; 1; 300",
279+
"^([\\+-]?\\d{4}(?!\\d{2}\\b))((-?)((0[1-9]|1[0-2])(\\3([12]\\d|0[1-9]|3[01]))?|W([0-4]\\d|5[0-2])(-?[1-7])?|(00[1-9]|0[1-9]\\d|[12]\\d{2}|3([0-5]\\d|6[1-6])))([T\\s]((([01]\\d|2[0-3])((:?)[0-5]\\d)?|24\\:?00)([\\.,]\\d+(?!:))?)?(\\17[0-5]\\d([\\.,]\\d+)?)?([zZ]|([\\+-])([01]\\d|2[0-3]):?([0-5]\\d)?)?)?)?$; 1; 20; 1; 20",
280+
"^([\\+-]?\\d{4}(?!\\d{2}\\b))((-?)((0[1-9]|1[0-2])(\\3([12]\\d|0[1-9]|3[01]))?|W([0-4]\\d|5[0-2])(-?[1-7])?|(00[1-9]|0[1-9]\\d|[12]\\d{2}|3([0-5]\\d|6[1-6])))([T\\s]((([01]\\d|2[0-3])((:?)[0-5]\\d)?|24\\:?00)([\\.,]\\d+(?!:))?)?(\\17[0-5]\\d([\\.,]\\d+)?)?([zZ]|([\\+-])([01]\\d|2[0-3]):?([0-5]\\d)?)?)?)?$; -1; -1; 1; 20",
281+
".*(^TVA([1-9][0-9]?|1[0-9]{2})$).*; 1; 10; 1; 10",
282+
".*(^TVA([1-9][0-9]?|1[0-9]{2})$).*; -1; -1; 1; 10"
268283
}, delimiter = ';')
269-
void shouldGenerateRegex(String pattern, int minSize, int maxSize) {
284+
void shouldGenerateRegex(String pattern, int minSize, int maxSize, int expectedMinSize, int expectedMaxSize) {
270285
String generated = StringGenerator.generate(pattern, minSize, maxSize);
271-
if (minSize == -1 && maxSize == -1) {
272-
Assertions.assertThat(generated).hasSize(1).matches(pattern);
273-
} else {
274-
Assertions.assertThat(generated).hasSizeBetween(minSize, maxSize).matches(pattern);
275-
}
286+
287+
Assertions.assertThat(generated).hasSizeBetween(expectedMinSize, expectedMaxSize).matches(pattern);
288+
}
289+
290+
@Test
291+
void shouldHaveSizeOne() {
292+
String pattern = "^[^\\u0000-\\u00FF]$";
293+
String generated = StringGenerator.generate(pattern, -1, -1);
294+
Assertions.assertThat(generated).hasSize(1).matches(pattern);
276295
}
277296
}

0 commit comments

Comments
 (0)