Skip to content

Commit 42e1f72

Browse files
authored
patches: Add wcslen support and move typedefs in nls (#119)
Error log: ld.lld: error: undefined symbol: wcslen >>> referenced by nls_ucs2_utils.h:54 (fs/smb/client/../../nls/nls_ucs2_utils.h:54) >>> vmlinux.o:(alloc_path_with_tree_prefix) >>> referenced by nls_ucs2_utils.h:54 (fs/smb/client/../../nls/nls_ucs2_utils.h:54) >>> vmlinux.o:(alloc_path_with_tree_prefix) Signed-off-by: Hai Tran <[email protected]>
1 parent 9b40592 commit 42e1f72

File tree

2 files changed

+170
-0
lines changed

2 files changed

+170
-0
lines changed
Lines changed: 83 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,83 @@
1+
From: Nathan Chancellor <[email protected]>
2+
Subject: [PATCH v3 1/2] include: Move typedefs in nls.h to their own header
3+
Date: Fri, 28 Mar 2025 12:26:31 -0700
4+
Content-Type: text/plain; charset="utf-8"
5+
6+
In order to allow commonly included headers such as string.h to access
7+
typedefs such as wchar_t without running into issues with the rest of
8+
the NLS library, refactor the typedefs out into their own header that
9+
can be included in a much safer manner.
10+
11+
12+
Reviewed-by: Andy Shevchenko <[email protected]>
13+
Signed-off-by: Nathan Chancellor <[email protected]>
14+
---
15+
include/linux/nls.h | 19 +------------------
16+
include/linux/nls_types.h | 26 ++++++++++++++++++++++++++
17+
2 files changed, 27 insertions(+), 18 deletions(-)
18+
19+
diff --git a/include/linux/nls.h b/include/linux/nls.h
20+
index e0bf8367b274..3d416d1f60b6 100644
21+
--- a/include/linux/nls.h
22+
+++ b/include/linux/nls.h
23+
@@ -3,24 +3,7 @@
24+
#define _LINUX_NLS_H
25+
26+
#include <linux/init.h>
27+
-
28+
-/* Unicode has changed over the years. Unicode code points no longer
29+
- * fit into 16 bits; as of Unicode 5 valid code points range from 0
30+
- * to 0x10ffff (17 planes, where each plane holds 65536 code points).
31+
- *
32+
- * The original decision to represent Unicode characters as 16-bit
33+
- * wchar_t values is now outdated. But plane 0 still includes the
34+
- * most commonly used characters, so we will retain it. The newer
35+
- * 32-bit unicode_t type can be used when it is necessary to
36+
- * represent the full Unicode character set.
37+
- */
38+
-
39+
-/* Plane-0 Unicode character */
40+
-typedef u16 wchar_t;
41+
-#define MAX_WCHAR_T 0xffff
42+
-
43+
-/* Arbitrary Unicode character */
44+
-typedef u32 unicode_t;
45+
+#include <linux/nls_types.h>
46+
47+
struct nls_table {
48+
const char *charset;
49+
diff --git a/include/linux/nls_types.h b/include/linux/nls_types.h
50+
new file mode 100644
51+
index 000000000000..9479df1016da
52+
--- /dev/null
53+
+++ b/include/linux/nls_types.h
54+
@@ -0,0 +1,26 @@
55+
+/* SPDX-License-Identifier: GPL-2.0 */
56+
+#ifndef _LINUX_NLS_TYPES_H
57+
+#define _LINUX_NLS_TYPES_H
58+
+
59+
+#include <linux/types.h>
60+
+
61+
+/*
62+
+ * Unicode has changed over the years. Unicode code points no longer
63+
+ * fit into 16 bits; as of Unicode 5 valid code points range from 0
64+
+ * to 0x10ffff (17 planes, where each plane holds 65536 code points).
65+
+ *
66+
+ * The original decision to represent Unicode characters as 16-bit
67+
+ * wchar_t values is now outdated. But plane 0 still includes the
68+
+ * most commonly used characters, so we will retain it. The newer
69+
+ * 32-bit unicode_t type can be used when it is necessary to
70+
+ * represent the full Unicode character set.
71+
+ */
72+
+
73+
+/* Plane-0 Unicode character */
74+
+typedef u16 wchar_t;
75+
+#define MAX_WCHAR_T 0xffff
76+
+
77+
+/* Arbitrary Unicode character */
78+
+typedef u32 unicode_t;
79+
+
80+
+#endif /* _LINUX_NLS_TYPES_H */
81+
--
82+
2.49.0
83+
Lines changed: 87 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,87 @@
1+
From: Nathan Chancellor <[email protected]>
2+
Subject: [PATCH v3 2/2] lib/string.c: Add wcslen()
3+
Date: Fri, 28 Mar 2025 12:26:32 -0700
4+
Content-Type: text/plain; charset="utf-8"
5+
6+
A recent optimization change in LLVM [1] aims to transform certain loop
7+
idioms into calls to strlen() or wcslen(). This change transforms the
8+
first while loop in UniStrcat() into a call to wcslen(), breaking the
9+
build when UniStrcat() gets inlined into alloc_path_with_tree_prefix():
10+
11+
ld.lld: error: undefined symbol: wcslen
12+
>>> referenced by nls_ucs2_utils.h:54 (fs/smb/client/../../nls/nls_ucs2_utils.h:54)
13+
>>> vmlinux.o:(alloc_path_with_tree_prefix)
14+
>>> referenced by nls_ucs2_utils.h:54 (fs/smb/client/../../nls/nls_ucs2_utils.h:54)
15+
>>> vmlinux.o:(alloc_path_with_tree_prefix)
16+
17+
The kernel does not build with '-ffreestanding' (which would avoid this
18+
transformation) because it does want libcall optimizations in general
19+
and turning on '-ffreestanding' disables the majority of them. While
20+
'-fno-builtin-wcslen' would be more targeted at the problem, it does not
21+
work with LTO.
22+
23+
Add a basic wcslen() to avoid this linkage failure. While no
24+
architecture or FORTIFY_SOURCE overrides this, add it to string.c
25+
instead of string_helpers.c so that it is built with '-ffreestanding',
26+
otherwise the compiler might transform it into a call to itself.
27+
28+
29+
Link: https://github.com/llvm/llvm-project/commit/9694844d7e36fd5e01011ab56b64f27b867aa72d [1]
30+
Signed-off-by: Nathan Chancellor <[email protected]>
31+
---
32+
include/linux/string.h | 2 ++
33+
lib/string.c | 11 +++++++++++
34+
2 files changed, 13 insertions(+)
35+
36+
diff --git a/include/linux/string.h b/include/linux/string.h
37+
index 0403a4ca4c11..b000f445a2c7 100644
38+
--- a/include/linux/string.h
39+
+++ b/include/linux/string.h
40+
@@ -10,6 +10,7 @@
41+
#include <linux/stddef.h> /* for NULL */
42+
#include <linux/err.h> /* for ERR_PTR() */
43+
#include <linux/errno.h> /* for E2BIG */
44+
+#include <linux/nls_types.h> /* for wchar_t */
45+
#include <linux/overflow.h> /* for check_mul_overflow() */
46+
#include <linux/stdarg.h>
47+
#include <uapi/linux/string.h>
48+
@@ -203,6 +204,7 @@ extern __kernel_size_t strlen(const char *);
49+
#ifndef __HAVE_ARCH_STRNLEN
50+
extern __kernel_size_t strnlen(const char *,__kernel_size_t);
51+
#endif
52+
+__kernel_size_t wcslen(const wchar_t *s);
53+
#ifndef __HAVE_ARCH_STRPBRK
54+
extern char * strpbrk(const char *,const char *);
55+
#endif
56+
diff --git a/lib/string.c b/lib/string.c
57+
index eb4486ed40d2..2c6f8c8f4159 100644
58+
--- a/lib/string.c
59+
+++ b/lib/string.c
60+
@@ -21,6 +21,7 @@
61+
#include <linux/errno.h>
62+
#include <linux/limits.h>
63+
#include <linux/linkage.h>
64+
+#include <linux/nls_types.h>
65+
#include <linux/stddef.h>
66+
#include <linux/string.h>
67+
#include <linux/types.h>
68+
@@ -429,6 +430,16 @@ size_t strnlen(const char *s, size_t count)
69+
EXPORT_SYMBOL(strnlen);
70+
#endif
71+
72+
+size_t wcslen(const wchar_t *s)
73+
+{
74+
+ const wchar_t *sc;
75+
+
76+
+ for (sc = s; *sc != '\0'; ++sc)
77+
+ /* nothing */;
78+
+ return sc - s;
79+
+}
80+
+EXPORT_SYMBOL(wcslen);
81+
+
82+
#ifndef __HAVE_ARCH_STRSPN
83+
/**
84+
* strspn - Calculate the length of the initial substring of @s which only contain letters in @accept
85+
--
86+
2.49.0
87+

0 commit comments

Comments
 (0)