@@ -70,7 +70,7 @@ func (r *Reader) ReadToEnd(ctx context.Context) {
70
70
// SectionReader can only read a fixed window (from previous offset to EOF).
71
71
info , err := r .file .Stat ()
72
72
if err != nil {
73
- r .set .Logger .Error ("Failed to stat" , zap .Error (err ))
73
+ r .set .Logger .Error ("failed to stat" , zap .Error (err ))
74
74
return
75
75
}
76
76
currentEOF := info .Size ()
@@ -80,7 +80,7 @@ func (r *Reader) ReadToEnd(ctx context.Context) {
80
80
gzipReader , err := gzip .NewReader (io .NewSectionReader (r .file , r .Offset , currentEOF ))
81
81
if err != nil {
82
82
if ! errors .Is (err , io .EOF ) {
83
- r .set .Logger .Error ("Failed to create gzip reader" , zap .Error (err ))
83
+ r .set .Logger .Error ("failed to create gzip reader" , zap .Error (err ))
84
84
}
85
85
return
86
86
} else {
@@ -96,7 +96,7 @@ func (r *Reader) ReadToEnd(ctx context.Context) {
96
96
}
97
97
98
98
if _ , err := r .file .Seek (r .Offset , 0 ); err != nil {
99
- r .set .Logger .Error ("Failed to seek" , zap .Error (err ))
99
+ r .set .Logger .Error ("failed to seek" , zap .Error (err ))
100
100
return
101
101
}
102
102
@@ -106,9 +106,90 @@ func (r *Reader) ReadToEnd(ctx context.Context) {
106
106
}
107
107
}()
108
108
109
+ doneReadingFile := r .readHeader (ctx )
110
+ if doneReadingFile {
111
+ return
112
+ }
113
+
114
+ // Reset position in file to r.Offest after the header scanner might have moved it past a content token.
115
+ if _ , err := r .file .Seek (r .Offset , 0 ); err != nil {
116
+ r .set .Logger .Error ("failed to seek post-header" , zap .Error (err ))
117
+ return
118
+ }
119
+
120
+ r .readContents (ctx )
121
+ }
122
+
123
+ func (r * Reader ) readHeader (ctx context.Context ) (doneReadingFile bool ) {
124
+ if r .headerReader == nil {
125
+ r .set .Logger .Debug ("no need to read header" , zap .Bool ("header_finalized" , r .HeaderFinalized ))
126
+ return false
127
+ }
128
+
109
129
s := scanner .New (r , r .maxLogSize , r .initialBufferSize , r .Offset , r .splitFunc )
110
130
111
- // Iterate over the tokenized file, emitting entries as we go
131
+ // Read the tokens from the file until no more header tokens are found or the end of file is reached.
132
+ for {
133
+ select {
134
+ case <- ctx .Done ():
135
+ return true
136
+ default :
137
+ }
138
+
139
+ ok := s .Scan ()
140
+ if ! ok {
141
+ if err := s .Error (); err != nil {
142
+ r .set .Logger .Error ("failed during header scan" , zap .Error (err ))
143
+ } else {
144
+ r .set .Logger .Debug ("end of file reached" , zap .Bool ("delete_at_eof" , r .deleteAtEOF ))
145
+ if r .deleteAtEOF {
146
+ r .delete ()
147
+ }
148
+ }
149
+ // Either end of file was reached, or file cannot be scanned.
150
+ return true
151
+ }
152
+
153
+ token , err := r .decoder .Decode (s .Bytes ())
154
+ if err != nil {
155
+ r .set .Logger .Error ("decode header: %w" , zap .Error (err ))
156
+ r .Offset = s .Pos () // move past the bad token or we may be stuck
157
+ continue
158
+ }
159
+
160
+ err = r .headerReader .Process (ctx , token , r .FileAttributes )
161
+ if err != nil {
162
+ if errors .Is (err , header .ErrEndOfHeader ) {
163
+ // End of header reached.
164
+ break
165
+ } else {
166
+ r .set .Logger .Error ("process header: %w" , zap .Error (err ))
167
+ }
168
+ }
169
+
170
+ r .Offset = s .Pos ()
171
+ }
172
+
173
+ // Clean up the header machinery
174
+ if err := r .headerReader .Stop (); err != nil {
175
+ r .set .Logger .Error ("failed to stop header pipeline during finalization" , zap .Error (err ))
176
+ }
177
+ r .headerReader = nil
178
+ r .HeaderFinalized = true
179
+ r .initialBufferSize = scanner .DefaultBufferSize
180
+
181
+ // Switch to the normal split and process functions.
182
+ r .splitFunc = r .lineSplitFunc
183
+ r .processFunc = r .emitFunc
184
+
185
+ return false
186
+ }
187
+
188
+ func (r * Reader ) readContents (ctx context.Context ) {
189
+ // Create the scanner to read the contents of the file.
190
+ s := scanner .New (r , r .maxLogSize , r .initialBufferSize , r .Offset , r .splitFunc )
191
+
192
+ // Iterate over the contents of the file.
112
193
for {
113
194
select {
114
195
case <- ctx .Done ():
@@ -119,7 +200,7 @@ func (r *Reader) ReadToEnd(ctx context.Context) {
119
200
ok := s .Scan ()
120
201
if ! ok {
121
202
if err := s .Error (); err != nil {
122
- r .set .Logger .Error ("Failed during scan" , zap .Error (err ))
203
+ r .set .Logger .Error ("failed during scan" , zap .Error (err ))
123
204
} else if r .deleteAtEOF {
124
205
r .delete ()
125
206
}
@@ -139,36 +220,11 @@ func (r *Reader) ReadToEnd(ctx context.Context) {
139
220
}
140
221
141
222
err = r .processFunc (ctx , token , r .FileAttributes )
142
- if err == nil {
143
- r .Offset = s .Pos () // successful emit, update offset
144
- continue
145
- }
146
-
147
- if ! errors .Is (err , header .ErrEndOfHeader ) {
223
+ if err != nil {
148
224
r .set .Logger .Error ("process: %w" , zap .Error (err ))
149
- r .Offset = s .Pos () // move past the bad token or we may be stuck
150
- continue
151
225
}
152
226
153
- // Clean up the header machinery
154
- if err = r .headerReader .Stop (); err != nil {
155
- r .set .Logger .Error ("Failed to stop header pipeline during finalization" , zap .Error (err ))
156
- }
157
- r .headerReader = nil
158
- r .HeaderFinalized = true
159
-
160
- // Switch to the normal split and process functions.
161
- r .splitFunc = r .lineSplitFunc
162
- r .processFunc = r .emitFunc
163
-
164
- // Recreate the scanner with the normal split func.
165
- // Do not use the updated offset from the old scanner, as the most recent token
166
- // could be split differently with the new splitter.
167
- if _ , err = r .file .Seek (r .Offset , 0 ); err != nil {
168
- r .set .Logger .Error ("Failed to seek post-header" , zap .Error (err ))
169
- return
170
- }
171
- s = scanner .New (r , r .maxLogSize , scanner .DefaultBufferSize , r .Offset , r .splitFunc )
227
+ r .Offset = s .Pos ()
172
228
}
173
229
}
174
230
0 commit comments