-
Notifications
You must be signed in to change notification settings - Fork 13.5k
[libc]: Clean up unnecessary function pointers in scanf #121215
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[libc]: Clean up unnecessary function pointers in scanf #121215
Conversation
|
@llvm/pr-subscribers-libc Author: Vinay Deshmukh (vinay-deshmukh) ChangesResolves #115394
Also, full build doesn't yet build on macos m1 AFAIK Full diff: https://github.com/llvm/llvm-project/pull/121215.diff 3 Files Affected:
diff --git a/libc/src/stdio/scanf_core/reader.cpp b/libc/src/stdio/scanf_core/reader.cpp
index ec1f5c098dc7a6..ccfcd220e19c21 100644
--- a/libc/src/stdio/scanf_core/reader.cpp
+++ b/libc/src/stdio/scanf_core/reader.cpp
@@ -6,13 +6,75 @@
//
//===----------------------------------------------------------------------===//
+#include "hdr/types/FILE.h"
#include "src/stdio/scanf_core/reader.h"
#include "src/__support/macros/config.h"
+#include "src/__support/File/file.h"
+
#include <stddef.h>
+
+
+
namespace LIBC_NAMESPACE_DECL {
namespace scanf_core {
+namespace internal {
+
+#if defined(LIBC_TARGET_ARCH_IS_GPU)
+// The GPU build provides FILE access through the host operating system's
+// library. So here we simply use the public entrypoints like in the SYSTEM_FILE
+// interface. Entrypoints should normally not call others, this is an exception.
+// FIXME: We do not acquire any locks here, so this is not thread safe.
+LIBC_INLINE int getc(void *f) {
+ return LIBC_NAMESPACE::getc(reinterpret_cast<::FILE *>(f));
+}
+
+LIBC_INLINE void ungetc(int c, void *f) {
+ LIBC_NAMESPACE::ungetc(c, reinterpret_cast<::FILE *>(f));
+}
+
+#elif !defined(LIBC_COPT_STDIO_USE_SYSTEM_FILE)
+
+LIBC_INLINE int getc(void *f) {
+ unsigned char c;
+ auto result =
+ reinterpret_cast<LIBC_NAMESPACE::File *>(f)->read_unlocked(&c, 1);
+ size_t r = result.value;
+ if (result.has_error() || r != 1)
+ return '\0';
+
+ return c;
+}
+
+LIBC_INLINE void ungetc(int c, void *f) {
+ reinterpret_cast<LIBC_NAMESPACE::File *>(f)->ungetc_unlocked(c);
+}
+
+#else // defined(LIBC_COPT_STDIO_USE_SYSTEM_FILE)
+
+// Since ungetc_unlocked isn't always available, we don't acquire the lock for
+// system files.
+LIBC_INLINE int getc(void *f) { return ::getc(reinterpret_cast<::FILE *>(f)); }
+
+LIBC_INLINE void ungetc(int c, void *f) {
+ ::ungetc(c, reinterpret_cast<::FILE *>(f));
+}
+#endif // LIBC_COPT_STDIO_USE_SYSTEM_FILE
+
+} // namespace internal
+
+char Reader::getc() {
+ ++cur_chars_read;
+ if (rb != nullptr) {
+ char output = rb->buffer[rb->buff_cur];
+ ++(rb->buff_cur);
+ return output;
+ }
+ // This should reset the buffer if applicable.
+ return static_cast<char>(internal::getc(input_stream));
+}
+
void Reader::ungetc(char c) {
--cur_chars_read;
if (rb != nullptr && rb->buff_cur > 0) {
@@ -23,7 +85,8 @@ void Reader::ungetc(char c) {
--(rb->buff_cur);
return;
}
- stream_ungetc(static_cast<int>(c), input_stream);
+ internal::ungetc(static_cast<int>(c), input_stream);
}
+
} // namespace scanf_core
} // namespace LIBC_NAMESPACE_DECL
diff --git a/libc/src/stdio/scanf_core/reader.h b/libc/src/stdio/scanf_core/reader.h
index f984fd93789109..e29ea6ea7d8917 100644
--- a/libc/src/stdio/scanf_core/reader.h
+++ b/libc/src/stdio/scanf_core/reader.h
@@ -16,9 +16,6 @@
namespace LIBC_NAMESPACE_DECL {
namespace scanf_core {
-using StreamGetc = int (*)(void *);
-using StreamUngetc = void (*)(int, void *);
-
// This is intended to be either a raw string or a buffer syncronized with the
// file's internal buffer.
struct ReadBuffer {
@@ -29,38 +26,21 @@ struct ReadBuffer {
class Reader {
ReadBuffer *rb;
-
void *input_stream = nullptr;
-
- // TODO: Remove these unnecessary function pointers
- StreamGetc stream_getc = nullptr;
- StreamUngetc stream_ungetc = nullptr;
-
size_t cur_chars_read = 0;
public:
// TODO: Set buff_len with a proper constant
LIBC_INLINE Reader(ReadBuffer *string_buffer) : rb(string_buffer) {}
- LIBC_INLINE Reader(void *stream, StreamGetc stream_getc_in,
- StreamUngetc stream_ungetc_in,
+ LIBC_INLINE Reader(void *stream,
ReadBuffer *stream_buffer = nullptr)
- : rb(stream_buffer), input_stream(stream), stream_getc(stream_getc_in),
- stream_ungetc(stream_ungetc_in) {}
+ : rb(stream_buffer), input_stream(stream){}
// This returns the next character from the input and advances it by one
// character. When it hits the end of the string or file it returns '\0' to
// signal to stop parsing.
- LIBC_INLINE char getc() {
- ++cur_chars_read;
- if (rb != nullptr) {
- char output = rb->buffer[rb->buff_cur];
- ++(rb->buff_cur);
- return output;
- }
- // This should reset the buffer if applicable.
- return static_cast<char>(stream_getc(input_stream));
- }
+ char getc();
// This moves the input back by one character, placing c into the buffer if
// this is a file reader, else c is ignored.
diff --git a/libc/src/stdio/scanf_core/vfscanf_internal.h b/libc/src/stdio/scanf_core/vfscanf_internal.h
index 67126431fcded5..84d074711b8fb6 100644
--- a/libc/src/stdio/scanf_core/vfscanf_internal.h
+++ b/libc/src/stdio/scanf_core/vfscanf_internal.h
@@ -38,14 +38,6 @@ LIBC_INLINE void flockfile(::FILE *) { return; }
LIBC_INLINE void funlockfile(::FILE *) { return; }
-LIBC_INLINE int getc(void *f) {
- return LIBC_NAMESPACE::getc(reinterpret_cast<::FILE *>(f));
-}
-
-LIBC_INLINE void ungetc(int c, void *f) {
- LIBC_NAMESPACE::ungetc(c, reinterpret_cast<::FILE *>(f));
-}
-
LIBC_INLINE int ferror_unlocked(::FILE *f) { return LIBC_NAMESPACE::ferror(f); }
#elif !defined(LIBC_COPT_STDIO_USE_SYSTEM_FILE)
@@ -58,21 +50,6 @@ LIBC_INLINE void funlockfile(FILE *f) {
reinterpret_cast<LIBC_NAMESPACE::File *>(f)->unlock();
}
-LIBC_INLINE int getc(void *f) {
- unsigned char c;
- auto result =
- reinterpret_cast<LIBC_NAMESPACE::File *>(f)->read_unlocked(&c, 1);
- size_t r = result.value;
- if (result.has_error() || r != 1)
- return '\0';
-
- return c;
-}
-
-LIBC_INLINE void ungetc(int c, void *f) {
- reinterpret_cast<LIBC_NAMESPACE::File *>(f)->ungetc_unlocked(c);
-}
-
LIBC_INLINE int ferror_unlocked(FILE *f) {
return reinterpret_cast<LIBC_NAMESPACE::File *>(f)->error_unlocked();
}
@@ -85,12 +62,6 @@ LIBC_INLINE void flockfile(::FILE *) { return; }
LIBC_INLINE void funlockfile(::FILE *) { return; }
-LIBC_INLINE int getc(void *f) { return ::getc(reinterpret_cast<::FILE *>(f)); }
-
-LIBC_INLINE void ungetc(int c, void *f) {
- ::ungetc(c, reinterpret_cast<::FILE *>(f));
-}
-
LIBC_INLINE int ferror_unlocked(::FILE *f) { return ::ferror(f); }
#endif // LIBC_COPT_STDIO_USE_SYSTEM_FILE
@@ -103,7 +74,7 @@ LIBC_INLINE int vfscanf_internal(::FILE *__restrict stream,
const char *__restrict format,
internal::ArgList &args) {
internal::flockfile(stream);
- scanf_core::Reader reader(stream, &internal::getc, internal::ungetc);
+ scanf_core::Reader reader(stream);
int retval = scanf_core::scanf_main(&reader, format, args);
if (retval == 0 && internal::ferror_unlocked(stream))
retval = EOF;
|
✅ With the latest revision this PR passed the C/C++ code formatter. |
From: https://github.com/llvm/llvm-project/actions/runs/12518682283/job/34921546556?pr=121215 Error was: ``` /usr/bin/ld: libc/src/stdio/scanf_core/CMakeFiles/libc.src.stdio.scanf_core.reader.dir/./reader.cpp.o: in function `__llvm_libc_20_0_0_git::scanf_core::Reader::getc()': reader.cpp:(.text._ZN22__llvm_libc_20_0_0_git10scanf_core6Reader4getcEv+0x38): undefined reference to `__llvm_libc_20_0_0_git::File::read_unlocked(void*, unsigned long)' /usr/bin/ld: libc/src/stdio/scanf_core/CMakeFiles/libc.src.stdio.scanf_core.reader.dir/./reader.cpp.o: in function `__llvm_libc_20_0_0_git::scanf_core::Reader::ungetc(char)': reader.cpp:(.text._ZN22__llvm_libc_20_0_0_git10scanf_core6Reader6ungetcEc+0x2c): undefined reference to `__llvm_libc_20_0_0_git::File::ungetc_unlocked(int)' /usr/bin/ld: libc/test/src/stdio/libc.test.src.stdio.sscanf_test.__unit__.__build__: hidden symbol `_ZN22__llvm_libc_20_0_0_git4File13read_unlockedEPvm' isn't defined /usr/bin/ld: final link failed: bad value clang++: error: linker command failed with exit code 1 (use -v to see invocation) ```
Ubuntu error - linker error for read_unlocked ungetc_unlocked
Windows error - macro substitution issue
Macos Error - macro substitution issue
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This looks like a good start, there's a couple things to clean up, but with the suggested changes it build on linux for me.
@@ -62,6 +62,7 @@ add_object_library( | |||
reader.h | |||
DEPENDS | |||
libc.src.__support.macros.attributes | |||
libc.hdr.types.FILE |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This is missing the dependency on libc.src.__support.File.file
, as well as the flag controlling if it's using the system file. You'll also need to set up proper handling for the GPU like what vfscanf_internal
has.
libc.hdr.types.FILE | |
libc.src.__support.File.file | |
libc.hdr.types.FILE | |
${use_system_file} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I seem to be getting an error like (after adding the above change):
The dependency target "libc.src.__support.File.file" of target
"libc.src.stdio.scanf_core.reader" does not exist.
is that expected on a M1 mac?
using the build commands from:#115394 (comment)
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
ah, that's because we only support overlay mode on mac, and overlay mode doesn't have FILE
. You'll need to add logic to only add libc.src.__support.File.file
to the list of DEPENDS
if LLVM_LIBC_FULL_BUILD
is true.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
like what vfscanf_internal has.
So, I think there's a bug / typo in the CMake call there.
Instead of:
add_header_library(
vfscanf_internal
HDRS
vfscanf_internal.h
DEPENDS
.reader
.scanf_main
libc.include.stdio
libc.src.__support.File.file
libc.src.__support.arg_list
${use_system_file}
)
it should be:
add_header_library(
vfscanf_internal
HDRS
vfscanf_internal.h
DEPENDS
.reader
.scanf_main
libc.include.stdio
libc.src.__support.File.file
libc.src.__support.arg_list
COMPILE_OPTIONS
${use_system_file}
)
I think is what caused my build to have the bugs with macOS' system stdio.h macros
Link to code:
llvm-project/libc/src/stdio/scanf_core/CMakeLists.txt
Lines 119 to 121 in 106c483
libc.src.__support.arg_list | |
${use_system_file} | |
) |
I can add the COMPILE_OPTIONS
in this PR if that's oK?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The COMPILE_OPTIONS
shouldn't be necessary, because use_system_file
already contains it, see: https://github.com/llvm/llvm-project/blob/main/libc/src/stdio/CMakeLists.txt#L30
libc/src/stdio/scanf_core/reader.cpp
Outdated
char Reader::getc() { | ||
++cur_chars_read; | ||
if (rb != nullptr) { | ||
char output = rb->buffer[rb->buff_cur]; | ||
++(rb->buff_cur); | ||
return output; | ||
} | ||
// This should reset the buffer if applicable. | ||
return static_cast<char>(internal::getc(input_stream)); | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
this being in the header was intentional. That way the fast path (reading directly from the buffer) gets properly inlined into the readers.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Gotcha, will move it back to header
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Is it ok to move the ungetc
implementation to the header as well?
The reason is, now that the internal::getc
is compiled based on the platform depending on which macro is defined, if we keep getc
in .h
but ungetc
in .cpp
that macro if/else chain will need to exist in two places over just one if it gets moved to .h
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Moving ungetc into the header should be fine.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I'm going to be OOO for the next week, so you will either need to wait or ask someone else to finish this review. I think it should be good to land once you do the CMake fix and address the comments, just make sure to test it before landing.
libc/src/stdio/scanf_core/reader.cpp
Outdated
char Reader::getc() { | ||
++cur_chars_read; | ||
if (rb != nullptr) { | ||
char output = rb->buffer[rb->buff_cur]; | ||
++(rb->buff_cur); | ||
return output; | ||
} | ||
// This should reset the buffer if applicable. | ||
return static_cast<char>(internal::getc(input_stream)); | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Moving ungetc into the header should be fine.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM with one nit. Thanks for doing the work on this. Do you need me to merge it for you or do you have commit access?
No problem. Fixed the nit as well. Will need you to merge it. Thanks! |
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/73/builds/13359 Here is the relevant piece of the build log for the reference
|
Looks like the issue is that the GPU-specific includes didn't get moved over. I'll make a PR to fix it. |
In llvm#121215 the reader was reorganized and the definitions of the internal getc and ungetc functions were moved, but the includes that the GPU builder depends on were not. This patch moves the includes to the correct new place.
Oh yeah I remember needing to do something stupid for the GPU |
In #121215 the reader was reorganized and the definitions of the internal getc and ungetc functions were moved, but the includes that the GPU builder depends on were not. This patch moves the includes to the correct new place.
Yeah, I figure we can clean that up in a followup. |
I think it was due to the lack of an |
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/11/builds/12915 Here is the relevant piece of the build log for the reference
|
Ah, due to the cmake conditions, scanf isn't working for targets without FILE. Give me a sec and I'll fix this. |
Just started looking into this. Thanks @michaelrj-google! Let me know if I can assist or review. |
Much appreciated, I'll send you a review once I have something ready. Right now it looks like there's two main issues: the |
Another followup fix to llvm#121215 The new cmake wouldn't define the readerat all if the target wasn't GPU or didn't have a definition of FILE. This patch rewrites the cmake to be more general. As a followup, I'd like to make `use_system_file` consistent between /test and /src. Currently in /src it includes the `COMPILE_OPTIONS` and in /test it does not.
Another followup fix to #121215 The new cmake wouldn't define the readerat all if the target wasn't GPU or didn't have a definition of FILE. This patch rewrites the cmake to be more general. As a followup, I'd like to make `use_system_file` consistent between /test and /src. Currently in /src it includes the `COMPILE_OPTIONS` and in /test it does not.
Resolves #115394
getc
ungetc
toreader.h
..h
reader.cpp
as it's empty nowAlso, full build doesn't yet build on macos m1 AFAIK