Skip to content

Commit c9445d4

Browse files
tobiasdiezLinusDietz
authored andcommitted
Fix #2775: Hyphens in last names are properly parsed (#3209)
1 parent 58fec29 commit c9445d4

File tree

3 files changed

+70
-33
lines changed

3 files changed

+70
-33
lines changed

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,7 @@ We refer to [GitHub issues](https://github.com/JabRef/jabref/issues) by using `#
4040
- We fixed an issue where metadata syncing with local and shared database were unstable. It will also fix syncing groups and sub-groups in database. [#2284](https://github.com/JabRef/jabref/issues/2284)
4141
- We fixed an issue where it was possible to leave the entry editor with an imbalance of braces. [#3167](https://github.com/JabRef/jabref/issues/3167)
4242
- Renaming files now truncates the filename to not exceed the limit of 255 chars [#2622](https://github.com/JabRef/jabref/issues/2622)
43+
- We improved the handling of hyphens in names. [#2775](https://github.com/JabRef/jabref/issues/2775)
4344

4445
### Removed
4546
- We removed support for LatexEditor, as it is not under active development. [#3199](https://github.com/JabRef/jabref/issues/3199)

src/main/java/org/jabref/model/entry/AuthorListParser.java

Lines changed: 46 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -32,25 +32,6 @@ public class AuthorListParser {
3232
// Constant HashSet containing names of TeX special characters
3333
private static final Set<String> TEX_NAMES = new HashSet<>();
3434

35-
/** the raw bibtex author/editor field */
36-
private String original;
37-
38-
/** index of the start in original, for example to point to 'abc' in 'abc xyz', tokenStart=2 */
39-
private int tokenStart;
40-
41-
/** index of the end in original, for example to point to 'abc' in 'abc xyz', tokenEnd=5 */
42-
private int tokenEnd;
43-
44-
/** end of token abbreviation (always: tokenStart < tokenAbbr <= tokenEnd), only valid if getToken returns TOKEN_WORD */
45-
private int tokenAbbr;
46-
47-
48-
/** either space of dash */
49-
private char tokenTerm;
50-
51-
/** true if upper-case token, false if lower-case */
52-
private boolean tokenCase;
53-
5435
static {
5536
TEX_NAMES.add("aa");
5637
TEX_NAMES.add("ae");
@@ -66,6 +47,32 @@ public class AuthorListParser {
6647
TEX_NAMES.add("j");
6748
}
6849

50+
/**
51+
* the raw bibtex author/editor field
52+
*/
53+
private String original;
54+
/**
55+
* index of the start in original, for example to point to 'abc' in 'abc xyz', tokenStart=2
56+
*/
57+
private int tokenStart;
58+
/**
59+
* index of the end in original, for example to point to 'abc' in 'abc xyz', tokenEnd=5
60+
*/
61+
private int tokenEnd;
62+
/**
63+
* end of token abbreviation (always: tokenStart < tokenAbbrEnd <= tokenEnd), only valid if getToken returns
64+
* TOKEN_WORD
65+
*/
66+
private int tokenAbbrEnd;
67+
/**
68+
* either space of dash
69+
*/
70+
private char tokenTerm;
71+
/**
72+
* true if upper-case token, false if lower-case
73+
*/
74+
private boolean tokenCase;
75+
6976
/**
7077
* Parses the String containing person names and returns a list of person information.
7178
*
@@ -121,7 +128,7 @@ private Optional<Author> getAuthor() {
121128
break;
122129
case TOKEN_WORD:
123130
tokens.add(original.substring(tokenStart, tokenEnd));
124-
tokens.add(original.substring(tokenStart, tokenAbbr));
131+
tokens.add(original.substring(tokenStart, tokenAbbrEnd));
125132
tokens.add(tokenTerm);
126133
tokens.add(tokenCase);
127134
if (commaFirst >= 0) {
@@ -137,6 +144,13 @@ private Optional<Author> getAuthor() {
137144
// We are in a first name which contained a hyphen
138145
break;
139146
}
147+
148+
int thisTermToken = previousTermToken + TOKEN_GROUP_LENGTH;
149+
if ((thisTermToken >= 0) && tokens.get(thisTermToken).equals('-')) {
150+
// We are in a name which contained a hyphen
151+
break;
152+
}
153+
140154
vonStart = tokens.size() - TOKEN_GROUP_LENGTH;
141155
break;
142156
}
@@ -194,14 +208,16 @@ private Optional<Author> getAuthor() {
194208
firstPartStart = 0;
195209
}
196210
}
197-
} else { // commas are present: it affects only 'first part' and
198-
// 'junior part'
211+
} else {
212+
// commas are present: it affects only 'first part' and 'junior part'
199213
firstPartEnd = tokens.size();
200-
if (commaSecond < 0) { // one comma
214+
if (commaSecond < 0) {
215+
// one comma
201216
if (commaFirst < firstPartEnd) {
202217
firstPartStart = commaFirst;
203218
}
204-
} else { // two or more commas
219+
} else {
220+
// two or more commas
205221
if (commaSecond < firstPartEnd) {
206222
firstPartStart = commaSecond;
207223
}
@@ -342,7 +358,7 @@ private int getToken() {
342358
tokenEnd++;
343359
return TOKEN_AND;
344360
}
345-
tokenAbbr = -1;
361+
tokenAbbrEnd = -1;
346362
tokenTerm = ' ';
347363
tokenCase = true;
348364
int bracesLevel = 0;
@@ -353,8 +369,9 @@ private int getToken() {
353369
if (c == '{') {
354370
bracesLevel++;
355371
}
356-
if (firstLetterIsFound && (tokenAbbr < 0) && ((bracesLevel == 0) || (c == '{'))) {
357-
tokenAbbr = tokenEnd;
372+
373+
if (firstLetterIsFound && (tokenAbbrEnd < 0) && ((bracesLevel == 0) || (c == '{'))) {
374+
tokenAbbrEnd = tokenEnd;
358375
}
359376
if ((c == '}') && (bracesLevel > 0)) {
360377
bracesLevel--;
@@ -388,8 +405,8 @@ private int getToken() {
388405
}
389406
tokenEnd++;
390407
}
391-
if (tokenAbbr < 0) {
392-
tokenAbbr = tokenEnd;
408+
if (tokenAbbrEnd < 0) {
409+
tokenAbbrEnd = tokenEnd;
393410
}
394411
if ((tokenEnd < original.length()) && (original.charAt(tokenEnd) == '-')) {
395412
tokenTerm = '-';

src/test/java/org/jabref/model/entry/AuthorListTest.java

Lines changed: 23 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,10 @@
77

88
public class AuthorListTest {
99

10+
public static int size(String bibtex) {
11+
return AuthorList.parse(bibtex).getNumberOfAuthors();
12+
}
13+
1014
@Test
1115
public void testFixAuthorNatbib() {
1216
Assert.assertEquals("", AuthorList.fixAuthorNatbib(""));
@@ -286,10 +290,6 @@ public void testFixAuthorForAlphabetization() {
286290
.fixAuthorForAlphabetization("John von Neumann and John Smith and de Black Brown, Jr., Peter"));
287291
}
288292

289-
public static int size(String bibtex) {
290-
return AuthorList.parse(bibtex).getNumberOfAuthors();
291-
}
292-
293293
@Test
294294
public void testSize() {
295295

@@ -625,6 +625,25 @@ public void parseNameWithHyphenInLastName() throws Exception {
625625
Assert.assertEquals(new AuthorList(expected), AuthorList.parse("Firstname Bailey-Jones"));
626626
}
627627

628+
@Test
629+
public void parseNameWithHyphenInLastNameWithInitials() throws Exception {
630+
Author expected = new Author("E. S.", "E. S.", null, "El-{M}allah", null);
631+
Assert.assertEquals(new AuthorList(expected), AuthorList.parse("E. S. El-{M}allah"));
632+
}
633+
634+
@Test
635+
public void parseNameWithHyphenInLastNameWithEscaped() throws Exception {
636+
Author expected = new Author("E. S.", "E. S.", null, "{K}ent-{B}oswell", null);
637+
Assert.assertEquals(new AuthorList(expected), AuthorList.parse("E. S. {K}ent-{B}oswell"));
638+
}
639+
640+
@Test
641+
public void parseNameWithHyphenInLastNameWhenLastNameGivenFirst() throws Exception {
642+
// TODO: Fix abbreviation to be "A."
643+
Author expected = new Author("ʿAbdallāh", "ʿ.", null, "al-Ṣāliḥ", null);
644+
Assert.assertEquals(new AuthorList(expected), AuthorList.parse("al-Ṣāliḥ, ʿAbdallāh"));
645+
}
646+
628647
@Test
629648
public void parseNameWithBraces() throws Exception {
630649
Author expected = new Author("H{e}lene", "H.", null, "Fiaux", null);

0 commit comments

Comments
 (0)