9 |
* Licensed under GPLv2 or later, see file LICENSE in this tarball for details. |
* Licensed under GPLv2 or later, see file LICENSE in this tarball for details. |
10 |
*/ |
*/ |
11 |
|
|
12 |
|
/* for getline() [GNUism] |
13 |
|
#ifndef _GNU_SOURCE |
14 |
|
#define _GNU_SOURCE 1 |
15 |
|
#endif |
16 |
|
*/ |
17 |
#include "libbb.h" |
#include "libbb.h" |
18 |
|
|
19 |
/* This function reads an entire line from a text file, up to a newline |
/* This function reads an entire line from a text file, up to a newline |
20 |
* or NUL byte, inclusive. It returns a malloc'ed char * which must be |
* or NUL byte, inclusive. It returns a malloc'ed char * which |
21 |
* stored and free'ed by the caller. If end is NULL '\n' isn't considered |
* must be free'ed by the caller. If end is NULL '\n' isn't considered |
22 |
* end of line. If end isn't NULL, length of the chunk read is stored in it. |
* end of line. If end isn't NULL, length of the chunk is stored in it. |
23 |
* Return NULL if EOF/error */ |
* If lineno is not NULL, *lineno is incremented for each line, |
24 |
|
* and also trailing '\' is recognized as line continuation. |
25 |
char *bb_get_chunk_from_file(FILE * file, int *end) |
* |
26 |
|
* Returns NULL if EOF/error. */ |
27 |
|
char* FAST_FUNC bb_get_chunk_with_continuation(FILE *file, int *end, int *lineno) |
28 |
{ |
{ |
29 |
int ch; |
int ch; |
30 |
int idx = 0; |
int idx = 0; |
34 |
while ((ch = getc(file)) != EOF) { |
while ((ch = getc(file)) != EOF) { |
35 |
/* grow the line buffer as necessary */ |
/* grow the line buffer as necessary */ |
36 |
if (idx >= linebufsz) { |
if (idx >= linebufsz) { |
37 |
linebuf = xrealloc(linebuf, linebufsz += 80); |
linebufsz += 256; |
38 |
|
linebuf = xrealloc(linebuf, linebufsz); |
39 |
} |
} |
40 |
linebuf[idx++] = (char) ch; |
linebuf[idx++] = (char) ch; |
41 |
if (!ch || (end && ch == '\n')) |
if (!ch) |
42 |
break; |
break; |
43 |
|
if (end && ch == '\n') { |
44 |
|
if (lineno == NULL) |
45 |
|
break; |
46 |
|
(*lineno)++; |
47 |
|
if (idx < 2 || linebuf[idx-2] != '\\') |
48 |
|
break; |
49 |
|
idx -= 2; |
50 |
|
} |
51 |
} |
} |
52 |
if (end) |
if (end) |
53 |
*end = idx; |
*end = idx; |
58 |
// free(linebuf); |
// free(linebuf); |
59 |
// return NULL; |
// return NULL; |
60 |
//} |
//} |
61 |
linebuf = xrealloc(linebuf, idx+1); |
linebuf = xrealloc(linebuf, idx + 1); |
62 |
linebuf[idx] = '\0'; |
linebuf[idx] = '\0'; |
63 |
} |
} |
64 |
return linebuf; |
return linebuf; |
65 |
} |
} |
66 |
|
|
67 |
|
char* FAST_FUNC bb_get_chunk_from_file(FILE *file, int *end) |
68 |
|
{ |
69 |
|
return bb_get_chunk_with_continuation(file, end, NULL); |
70 |
|
} |
71 |
|
|
72 |
/* Get line, including trailing \n if any */ |
/* Get line, including trailing \n if any */ |
73 |
char *xmalloc_fgets(FILE * file) |
char* FAST_FUNC xmalloc_fgets(FILE *file) |
74 |
{ |
{ |
75 |
int i; |
int i; |
76 |
|
|
77 |
return bb_get_chunk_from_file(file, &i); |
return bb_get_chunk_from_file(file, &i); |
78 |
} |
} |
|
|
|
79 |
/* Get line. Remove trailing \n */ |
/* Get line. Remove trailing \n */ |
80 |
char *xmalloc_getline(FILE * file) |
char* FAST_FUNC xmalloc_fgetline(FILE *file) |
81 |
{ |
{ |
82 |
int i; |
int i; |
83 |
char *c = bb_get_chunk_from_file(file, &i); |
char *c = bb_get_chunk_from_file(file, &i); |
87 |
|
|
88 |
return c; |
return c; |
89 |
} |
} |
90 |
|
|
91 |
|
#if 0 |
92 |
|
/* GNUism getline() should be faster (not tested) than a loop with fgetc */ |
93 |
|
|
94 |
|
/* Get line, including trailing \n if any */ |
95 |
|
char* FAST_FUNC xmalloc_fgets(FILE *file) |
96 |
|
{ |
97 |
|
char *res_buf = NULL; |
98 |
|
size_t res_sz; |
99 |
|
|
100 |
|
if (getline(&res_buf, &res_sz, file) == -1) { |
101 |
|
free(res_buf); /* uclibc allocates a buffer even on EOF. WTF? */ |
102 |
|
res_buf = NULL; |
103 |
|
} |
104 |
|
//TODO: trimming to res_sz? |
105 |
|
return res_buf; |
106 |
|
} |
107 |
|
/* Get line. Remove trailing \n */ |
108 |
|
char* FAST_FUNC xmalloc_fgetline(FILE *file) |
109 |
|
{ |
110 |
|
char *res_buf = NULL; |
111 |
|
size_t res_sz; |
112 |
|
|
113 |
|
res_sz = getline(&res_buf, &res_sz, file); |
114 |
|
|
115 |
|
if ((ssize_t)res_sz != -1) { |
116 |
|
if (res_buf[res_sz - 1] == '\n') |
117 |
|
res_buf[--res_sz] = '\0'; |
118 |
|
//TODO: trimming to res_sz? |
119 |
|
} else { |
120 |
|
free(res_buf); /* uclibc allocates a buffer even on EOF. WTF? */ |
121 |
|
res_buf = NULL; |
122 |
|
} |
123 |
|
return res_buf; |
124 |
|
} |
125 |
|
|
126 |
|
#endif |
127 |
|
|
128 |
|
#if 0 |
129 |
|
/* Faster routines (~twice as fast). +170 bytes. Unused as of 2008-07. |
130 |
|
* |
131 |
|
* NB: they stop at NUL byte too. |
132 |
|
* Performance is important here. Think "grep 50gigabyte_file"... |
133 |
|
* Ironically, grep can't use it because of NUL issue. |
134 |
|
* We sorely need C lib to provide fgets which reports size! |
135 |
|
* |
136 |
|
* Update: |
137 |
|
* Actually, uclibc and glibc have it. man getline. It's GNUism, |
138 |
|
* but very useful one (if it's as fast as this code). |
139 |
|
* TODO: |
140 |
|
* - currently, sed and sort use bb_get_chunk_from_file and heavily |
141 |
|
* depend on its "stop on \n or \0" behavior, and STILL they fail |
142 |
|
* to handle all cases with embedded NULs correctly. So: |
143 |
|
* - audit sed and sort; convert them to getline FIRST. |
144 |
|
* - THEN ditch bb_get_chunk_from_file, replace it with getline. |
145 |
|
* - provide getline implementation for non-GNU systems. |
146 |
|
*/ |
147 |
|
|
148 |
|
static char* xmalloc_fgets_internal(FILE *file, int *sizep) |
149 |
|
{ |
150 |
|
int len; |
151 |
|
int idx = 0; |
152 |
|
char *linebuf = NULL; |
153 |
|
|
154 |
|
while (1) { |
155 |
|
char *r; |
156 |
|
|
157 |
|
linebuf = xrealloc(linebuf, idx + 0x100); |
158 |
|
r = fgets(&linebuf[idx], 0x100, file); |
159 |
|
if (!r) { |
160 |
|
/* need to terminate in case this is error |
161 |
|
* (EOF puts NUL itself) */ |
162 |
|
linebuf[idx] = '\0'; |
163 |
|
break; |
164 |
|
} |
165 |
|
/* stupid. fgets knows the len, it should report it somehow */ |
166 |
|
len = strlen(&linebuf[idx]); |
167 |
|
idx += len; |
168 |
|
if (len != 0xff || linebuf[idx - 1] == '\n') |
169 |
|
break; |
170 |
|
} |
171 |
|
*sizep = idx; |
172 |
|
if (idx) { |
173 |
|
/* xrealloc(linebuf, idx + 1) is up to caller */ |
174 |
|
return linebuf; |
175 |
|
} |
176 |
|
free(linebuf); |
177 |
|
return NULL; |
178 |
|
} |
179 |
|
|
180 |
|
/* Get line, remove trailing \n */ |
181 |
|
char* FAST_FUNC xmalloc_fgetline_fast(FILE *file) |
182 |
|
{ |
183 |
|
int sz; |
184 |
|
char *r = xmalloc_fgets_internal(file, &sz); |
185 |
|
if (r && r[sz - 1] == '\n') |
186 |
|
r[--sz] = '\0'; |
187 |
|
return r; /* not xrealloc(r, sz + 1)! */ |
188 |
|
} |
189 |
|
|
190 |
|
char* FAST_FUNC xmalloc_fgets(FILE *file) |
191 |
|
{ |
192 |
|
int sz; |
193 |
|
return xmalloc_fgets_internal(file, &sz); |
194 |
|
} |
195 |
|
|
196 |
|
/* Get line, remove trailing \n */ |
197 |
|
char* FAST_FUNC xmalloc_fgetline(FILE *file) |
198 |
|
{ |
199 |
|
int sz; |
200 |
|
char *r = xmalloc_fgets_internal(file, &sz); |
201 |
|
if (!r) |
202 |
|
return r; |
203 |
|
if (r[sz - 1] == '\n') |
204 |
|
r[--sz] = '\0'; |
205 |
|
return xrealloc(r, sz + 1); |
206 |
|
} |
207 |
|
#endif |