2 * token.c -- tokenize strings, a la strtok(3)
4 * Copyright (C) 2007 Oracle. All rights reserved.
5 * Copyright (C) 2007 Chuck Lever <chuck.lever@oracle.com>
7 * This program is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU General Public
9 * License as published by the Free Software Foundation; either
10 * version 2 of the License, or (at your option) any later version.
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * General Public License for more details.
17 * You should have received a copy of the GNU General Public
18 * License along with this program; if not, write to the
19 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
20 * Boston, MA 021110-1307, USA.
25 * We've constructed a simple string tokenizer that is better than
26 * strtok(3) in several ways:
28 * 1. It doesn't interfere with ongoing tokenizations using strtok(3).
29 * 2. It's re-entrant so we can nest tokenizations, if needed.
30 * 3. It can handle double-quoted delimiters (needed for 'context="sd,fslj"').
31 * 4. It doesn't alter the string we're tokenizing, so it can work
32 * on write-protected strings as well as writable strings.
46 struct tokenizer_state {
52 static void find_next_nondelimiter(struct tokenizer_state *tstate)
54 while (*tstate->pos != '\0' && *tstate->pos == tstate->delimiter)
58 static size_t find_next_delimiter(struct tokenizer_state *tstate)
63 while (*tstate->pos != '\0') {
64 if (*tstate->pos == '"')
67 if (!quote_seen && *tstate->pos == tstate->delimiter)
74 /* did the string terminate before the close quote? */
76 tstate->error = EINVAL;
84 * next_token - find the next token in a string and return it
85 * @tstate: pointer to tokenizer context object
87 * Returns the next token found in the current string.
88 * Returns NULL if there are no more tokens in the string,
89 * or if an error occurs.
91 * Side effect: tstate is updated
93 char *next_token(struct tokenizer_state *tstate)
98 if (!tstate || !tstate->pos || tstate->error)
101 find_next_nondelimiter(tstate);
102 if (*tstate->pos == '\0')
106 len = find_next_delimiter(tstate);
108 token = strndup(token, len);
111 tstate->error = ENOMEM;
116 return NULL; /* no tokens found in this string */
120 * init_tokenizer - return an initialized tokenizer context object
121 * @string: pointer to C string
122 * @delimiter: single character that delimits tokens in @string
124 * Returns an initialized tokenizer context object
126 struct tokenizer_state *init_tokenizer(char *string, char delimiter)
128 struct tokenizer_state *tstate;
130 tstate = malloc(sizeof(*tstate));
132 tstate->pos = string;
133 tstate->delimiter = delimiter;
140 * tokenizer_error - digs error value out of tokenizer context
141 * @tstate: pointer to tokenizer context object
144 int tokenizer_error(struct tokenizer_state *tstate)
146 return tstate ? tstate->error : 0;
150 * end_tokenizer - free a tokenizer context object
151 * @tstate: pointer to tokenizer context object
154 void end_tokenizer(struct tokenizer_state *tstate)