Skip to content

Commit baae94c

Browse files
chris-camposcopybara-github
authored andcommitted
Add option for ignoring whitespace
Sometimes, in the config generator, the files we are trying to compare to have been formatted to meet certain style guidelines. In these cases, ignoring whitespaces leads to more accurate results for the glob matching and core.move transform heuristics. Adding this as a flag to allow for easy enable/disable behavior. BUG=409065048 PiperOrigin-RevId: 744862693 Change-Id: I8493ed0e86e073ea1c602c4f3ff734db846b8da7
1 parent 0ed6995 commit baae94c

File tree

8 files changed

+160
-33
lines changed

8 files changed

+160
-33
lines changed

java/com/google/copybara/configgen/ConfigGenHeuristics.java

+14-4
Original file line numberDiff line numberDiff line change
@@ -67,6 +67,7 @@ public class ConfigGenHeuristics {
6767
private final ImmutableSet<Path> destinationOnlyPaths;
6868
private final int percentSimilar;
6969
private final boolean ignoreCarriageReturn;
70+
private final boolean ignoreWhitespace;
7071

7172
/**
7273
* Creates the Generator object
@@ -84,12 +85,14 @@ public ConfigGenHeuristics(
8485
Path destination,
8586
ImmutableSet<Path> destinationOnlyPaths,
8687
int percentSimilar,
87-
boolean ignoreCarriageReturn) {
88+
boolean ignoreCarriageReturn,
89+
boolean ignoreWhitespace) {
8890
this.origin = checkNotNull(origin);
8991
this.destination = checkNotNull(destination);
9092
this.destinationOnlyPaths = checkNotNull(destinationOnlyPaths);
9193
this.percentSimilar = percentSimilar;
9294
this.ignoreCarriageReturn = ignoreCarriageReturn;
95+
this.ignoreWhitespace = ignoreWhitespace;
9396
}
9497

9598
/** Result of the config generation */
@@ -134,7 +137,12 @@ public ConfigGenHeuristics.Result run() throws IOException {
134137
ImmutableSet<Path> g3Files = listFiles(destination);
135138
SimilarityDetector similarityDetector =
136139
SimilarityDetector.create(
137-
origin, gitFiles, destinationOnlyPaths, percentSimilar, ignoreCarriageReturn);
140+
origin,
141+
gitFiles,
142+
destinationOnlyPaths,
143+
percentSimilar,
144+
ignoreCarriageReturn,
145+
ignoreWhitespace);
138146
// Map of destination file paths to origin file paths with similarity score.
139147
Map<Path, PathAndScore> destinationToOriginMapping = new TreeMap<>();
140148

@@ -499,9 +507,11 @@ private static SimilarityDetector create(
499507
ImmutableSet<Path> files,
500508
ImmutableSet<Path> destinationOnlyPaths,
501509
int percentSimilar,
502-
boolean ignoreCarriageReturn)
510+
boolean ignoreCarriageReturn,
511+
boolean ignoreWhitespace)
503512
throws IOException {
504-
RenameDetector<Path> similarLines = new RenameDetector<>(ignoreCarriageReturn);
513+
RenameDetector<Path> similarLines =
514+
new RenameDetector<>(ignoreCarriageReturn, ignoreWhitespace);
505515
HashMultimap<String, Path> hashes = HashMultimap.create(files.size(), 1);
506516
for (Path file : files) {
507517
byte[] bytes = Files.readAllBytes(parent.resolve(file));

java/com/google/copybara/onboard/ConfigHeuristicsInputProvider.java

+5-1
Original file line numberDiff line numberDiff line change
@@ -59,18 +59,21 @@ public class ConfigHeuristicsInputProvider implements InputProvider {
5959

6060
private final GitOptions gitOptions;
6161
private final GeneralOptions generalOptions;
62+
private final GeneratorOptions generatorOptions;
6263
private final ImmutableSet<Path> destinationOnlyPaths;
6364
private final int percentSimilar;
6465
private final Console console;
6566

6667
public ConfigHeuristicsInputProvider(
6768
GitOptions gitOptions,
6869
GeneralOptions generalOptions,
70+
GeneratorOptions generatorOptions,
6971
ImmutableSet<Path> destinationOnlyPaths,
7072
int percentSimilar,
7173
Console console) {
7274
this.gitOptions = gitOptions;
7375
this.generalOptions = generalOptions;
76+
this.generatorOptions = generatorOptions;
7477
this.destinationOnlyPaths = destinationOnlyPaths;
7578
this.percentSimilar = percentSimilar;
7679
this.console = console;
@@ -140,7 +143,8 @@ private Optional<Result> computeHeuristic(
140143
destination,
141144
destinationOnlyPaths,
142145
percentSimilar,
143-
generalOptions.isTemporaryFeature("GENERATOR_IGNORE_CARRIAGE_RETURN", true));
146+
generatorOptions.computeGlobIgnoreCarriageReturn,
147+
generatorOptions.computeGlobIgnoreWhitespace);
144148

145149
console.progressFmt("Computing globs");
146150
cached = Optional.of(heuristics.run());

java/com/google/copybara/onboard/GeneratorCmd.java

+1
Original file line numberDiff line numberDiff line change
@@ -98,6 +98,7 @@ public ImmutableList<InputProvider> getInputProviders(CommandEnv commandEnv)
9898
new ConfigHeuristicsInputProvider(
9999
commandEnv.getOptions().get(GitOptions.class),
100100
commandEnv.getOptions().get(GeneralOptions.class),
101+
commandEnv.getOptions().get(GeneratorOptions.class),
101102
ImmutableSet.of(),
102103
PERCENTAGE_SIMILAR,
103104
console),

java/com/google/copybara/onboard/GeneratorOptions.java

+15
Original file line numberDiff line numberDiff line change
@@ -52,4 +52,19 @@ public class GeneratorOptions implements Option {
5252
names = "--new-package",
5353
description = "Whether or not files from this package exist in the destination.")
5454
public boolean newPackage;
55+
56+
@Parameter(
57+
names = "--compute-glob-ignore-carriage-return",
58+
description =
59+
"Whether to ignore carriage return characters in file content comparisons during glob"
60+
+ " generation.",
61+
arity = 1)
62+
public boolean computeGlobIgnoreCarriageReturn = true;
63+
64+
@Parameter(
65+
names = "--compute-glob-ignore-whitespace",
66+
description =
67+
"Whether to ignore whitespace in file content comparisons during glob generation.",
68+
arity = 1)
69+
public boolean computeGlobIgnoreWhitespace = true;
5570
}

java/com/google/copybara/util/RenameDetector.java

+11-3
Original file line numberDiff line numberDiff line change
@@ -37,11 +37,13 @@
3737
*/
3838
public final class RenameDetector<I> {
3939
private final boolean ignoreCarriageReturn;
40+
private final boolean ignoreWhitespace;
4041

4142
private final List<PriorFile<I>> priorFiles = new ArrayList<>();
4243

43-
public RenameDetector(boolean ignoreCarriageReturn) {
44+
public RenameDetector(boolean ignoreCarriageReturn, boolean ignoreWhitespace) {
4445
this.ignoreCarriageReturn = ignoreCarriageReturn;
46+
this.ignoreWhitespace = ignoreWhitespace;
4547
}
4648

4749
private static final class PriorFile<I> {
@@ -63,10 +65,12 @@ private static final class HashingByteProcessor implements ByteProcessor<int[]>
6365

6466
int hash;
6567
final boolean ignoreCarriageReturn;
68+
final boolean ignoreWhitespace;
6669
final HashSet<Integer> hashes = new HashSet<>();
6770

68-
HashingByteProcessor(boolean ignoreCarriageReturn) {
71+
HashingByteProcessor(boolean ignoreCarriageReturn, boolean ignoreWhitespace) {
6972
this.ignoreCarriageReturn = ignoreCarriageReturn;
73+
this.ignoreWhitespace = ignoreWhitespace;
7074
}
7175

7276
@Override
@@ -77,6 +81,9 @@ public boolean processBytes(byte[] buf, int off, int len) {
7781
// Skip carriage return in Windows-style line endings when hashing.
7882
continue;
7983
}
84+
if (ignoreWhitespace && (b == ' ' || b == '\t')) {
85+
continue;
86+
}
8087

8188
hash *= 31;
8289
hash += b;
@@ -102,7 +109,8 @@ public int[] getResult() {
102109
*/
103110
private int[] hashes(InputStream input) throws IOException {
104111
try {
105-
return ByteStreams.readBytes(input, new HashingByteProcessor(ignoreCarriageReturn));
112+
return ByteStreams.readBytes(
113+
input, new HashingByteProcessor(ignoreCarriageReturn, ignoreWhitespace));
106114
} finally {
107115
input.close();
108116
}

javatests/com/google/copybara/configgen/ConfigGenHeuristicsTest.java

+16-4
Original file line numberDiff line numberDiff line change
@@ -73,7 +73,18 @@ public void testRename() throws IOException {
7373
public void testIgnoreCarriageReturn() throws IOException {
7474
writeFile(origin, "fileA", "aaa\n".repeat(20));
7575
writeFile(destination, "fileB", "aaa\r\n".repeat(20) + "CHANGED");
76-
ConfigGenHeuristics.Result result = createHeuristics(true).run();
76+
ConfigGenHeuristics.Result result =
77+
createHeuristics(/* ignoreCarriageReturn= */ true, /* ignoreWhitespace= */ false).run();
78+
79+
globMatches(result.getOriginGlob(), "a/b/c/fileA");
80+
}
81+
82+
@Test
83+
public void testIgnoreWhitespace() throws IOException {
84+
writeFile(origin, "fileA", "aaa \n".repeat(20));
85+
writeFile(destination, "fileB", "aaa \n".repeat(20) + "CHANGED");
86+
ConfigGenHeuristics.Result result =
87+
createHeuristics(/* ignoreCarriageReturn= */ false, /* ignoreWhitespace= */ true).run();
7788

7889
globMatches(result.getOriginGlob(), "a/b/c/fileA");
7990
}
@@ -251,11 +262,12 @@ private static void writeFile(Path basePath, String relativePath, String content
251262
}
252263

253264
private ConfigGenHeuristics createHeuristics() {
254-
return createHeuristics(false);
265+
return createHeuristics(/* ignoreCarriageReturn= */ false, /* ignoreWhitespace= */ false);
255266
}
256267

257-
private ConfigGenHeuristics createHeuristics(boolean ignoreCarriageReturn) {
268+
private ConfigGenHeuristics createHeuristics(
269+
boolean ignoreCarriageReturn, boolean ignoreWhitespace) {
258270
return new ConfigGenHeuristics(
259-
origin, destination, destinationOnlyPaths, 30, ignoreCarriageReturn);
271+
origin, destination, destinationOnlyPaths, 30, ignoreCarriageReturn, ignoreWhitespace);
260272
}
261273
}

javatests/com/google/copybara/onboard/ConfigHeuristicsInputProviderTest.java

+5-4
Original file line numberDiff line numberDiff line change
@@ -61,18 +61,19 @@ public class ConfigHeuristicsInputProviderTest {
6161
protected TestingConsole console;
6262
protected OptionsBuilder optionsBuilder;
6363
protected GeneralOptions generalOptions;
64+
protected GeneratorOptions generatorOptions;
6465

6566
@Before
6667
public void setup() throws Exception {
6768
console = new TestingConsole();
6869
repoGitDir = Files.createTempDirectory("GitDestinationTest-repoGitDir");
6970
workDir = Files.createTempDirectory("workdir");
7071
destination = Files.createTempDirectory("destination");
71-
7272
optionsBuilder = getOptionsBuilder(console);
7373
String unused = git("init", "--bare", repoGitDir.toString());
7474
generalOptions = optionsBuilder.general;
7575
gitOptions = optionsBuilder.git;
76+
generatorOptions = optionsBuilder.generator;
7677
origin =
7778
GitRepository.newBareRepo(
7879
repoGitDir,
@@ -118,7 +119,7 @@ public <T> T resolve(Input<T> input) throws CannotProvideException {
118119

119120
ConfigHeuristicsInputProvider inputProvider =
120121
new ConfigHeuristicsInputProvider(
121-
gitOptions, generalOptions, ImmutableSet.of(), 30, console);
122+
gitOptions, generalOptions, generatorOptions, ImmutableSet.of(), 30, console);
122123
Optional<Glob> glob = inputProvider.resolve(Inputs.ORIGIN_GLOB, resolver);
123124

124125
// The result is an empty glob rather than glob(include = ["**"], exclude = ["**"])
@@ -155,7 +156,7 @@ public <T> T resolve(Input<T> input) throws CannotProvideException {
155156
};
156157
ConfigHeuristicsInputProvider inputProvider =
157158
new ConfigHeuristicsInputProvider(
158-
gitOptions, generalOptions, ImmutableSet.of(), 30, console);
159+
gitOptions, generalOptions, generatorOptions, ImmutableSet.of(), 30, console);
159160

160161
DestinationExcludePaths paths =
161162
inputProvider.resolve(Inputs.DESTINATION_EXCLUDE_PATHS, resolver).get();
@@ -201,7 +202,7 @@ public <T> T resolve(Input<T> input) throws CannotProvideException {
201202

202203
ConfigHeuristicsInputProvider inputProvider =
203204
new ConfigHeuristicsInputProvider(
204-
gitOptions, generalOptions, ImmutableSet.of(), 30, console);
205+
gitOptions, generalOptions, generatorOptions, ImmutableSet.of(), 30, console);
205206
Glob expectedGlob = Glob.createGlob(ImmutableList.of("**"), ImmutableList.of("bar.txt"));
206207
Optional<Glob> glob = inputProvider.resolve(Inputs.ORIGIN_GLOB, resolver);
207208

0 commit comments

Comments
 (0)