aboutsummaryrefslogtreecommitdiff
path: root/gl/regexec.c
diff options
context:
space:
mode:
Diffstat (limited to 'gl/regexec.c')
-rw-r--r--gl/regexec.c161
1 files changed, 85 insertions, 76 deletions
diff --git a/gl/regexec.c b/gl/regexec.c
index 5452ef78..dc449ce5 100644
--- a/gl/regexec.c
+++ b/gl/regexec.c
@@ -1,6 +1,6 @@
/* Extended regular expression matching and search library.
- Copyright (C) 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009
- Free Software Foundation, Inc.
+ Copyright (C) 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010 Free
+ Software Foundation, Inc.
This file is part of the GNU C Library.
Contributed by Isamu Hasegawa <isamu@yamato.ibm.com>.
@@ -637,7 +637,7 @@ re_exec (s)
(0 <= LAST_START && LAST_START <= LENGTH) */
static reg_errcode_t
-internal_function
+internal_function __attribute_warn_unused_result__
re_search_internal (const regex_t *preg,
const char *string, Idx length,
Idx start, Idx last_start, Idx stop,
@@ -833,10 +833,10 @@ re_search_internal (const regex_t *preg,
break;
match_first += incr;
if (match_first < left_lim || match_first > right_lim)
- {
- err = REG_NOMATCH;
- goto free_return;
- }
+ {
+ err = REG_NOMATCH;
+ goto free_return;
+ }
}
break;
}
@@ -953,14 +953,14 @@ re_search_internal (const regex_t *preg,
}
if (dfa->subexp_map)
- for (reg_idx = 0; reg_idx + 1 < nmatch; reg_idx++)
- if (dfa->subexp_map[reg_idx] != reg_idx)
- {
- pmatch[reg_idx + 1].rm_so
- = pmatch[dfa->subexp_map[reg_idx] + 1].rm_so;
- pmatch[reg_idx + 1].rm_eo
- = pmatch[dfa->subexp_map[reg_idx] + 1].rm_eo;
- }
+ for (reg_idx = 0; reg_idx + 1 < nmatch; reg_idx++)
+ if (dfa->subexp_map[reg_idx] != reg_idx)
+ {
+ pmatch[reg_idx + 1].rm_so
+ = pmatch[dfa->subexp_map[reg_idx] + 1].rm_so;
+ pmatch[reg_idx + 1].rm_eo
+ = pmatch[dfa->subexp_map[reg_idx] + 1].rm_eo;
+ }
}
free_return:
@@ -972,7 +972,7 @@ re_search_internal (const regex_t *preg,
}
static reg_errcode_t
-internal_function
+internal_function __attribute_warn_unused_result__
prune_impossible_nodes (re_match_context_t *mctx)
{
const re_dfa_t *const dfa = mctx->dfa;
@@ -1110,7 +1110,7 @@ acquire_init_state_context (reg_errcode_t *err, const re_match_context_t *mctx,
index of the buffer. */
static Idx
-internal_function
+internal_function __attribute_warn_unused_result__
check_matching (re_match_context_t *mctx, bool fl_longest_match,
Idx *p_match_first)
{
@@ -1149,7 +1149,7 @@ check_matching (re_match_context_t *mctx, bool fl_longest_match,
{
err = transit_state_bkref (mctx, &cur_state->nodes);
if (BE (err != REG_NOERROR, 0))
- return err;
+ return err;
}
}
}
@@ -1176,16 +1176,16 @@ check_matching (re_match_context_t *mctx, bool fl_longest_match,
Idx next_char_idx = re_string_cur_idx (&mctx->input) + 1;
if (BE (next_char_idx >= mctx->input.bufs_len, 0)
- || (BE (next_char_idx >= mctx->input.valid_len, 0)
- && mctx->input.valid_len < mctx->input.len))
- {
- err = extend_buffers (mctx);
- if (BE (err != REG_NOERROR, 0))
+ || (BE (next_char_idx >= mctx->input.valid_len, 0)
+ && mctx->input.valid_len < mctx->input.len))
+ {
+ err = extend_buffers (mctx);
+ if (BE (err != REG_NOERROR, 0))
{
assert (err == REG_ESPACE);
return REG_ERROR;
}
- }
+ }
cur_state = transit_state (&err, mctx, cur_state);
if (mctx->state_log != NULL)
@@ -1309,17 +1309,17 @@ proceed_next_node (const re_match_context_t *mctx, Idx nregs, regmatch_t *regs,
if (dest_node == REG_MISSING)
dest_node = candidate;
- else
+ else
{
/* In order to avoid infinite loop like "(a*)*", return the second
- epsilon-transition if the first was already considered. */
+ epsilon-transition if the first was already considered. */
if (re_node_set_contains (eps_via_nodes, dest_node))
- return candidate;
+ return candidate;
/* Otherwise, push the second epsilon-transition on the fail stack. */
else if (fs != NULL
&& push_fail_stack (fs, *pidx, candidate, nregs, regs,
- eps_via_nodes))
+ eps_via_nodes))
return REG_ERROR;
/* We know we are going to exit. */
@@ -1385,7 +1385,7 @@ proceed_next_node (const re_match_context_t *mctx, Idx nregs, regmatch_t *regs,
}
static reg_errcode_t
-internal_function
+internal_function __attribute_warn_unused_result__
push_fail_stack (struct re_fail_stack_t *fs, Idx str_idx, Idx dest_node,
Idx nregs, regmatch_t *regs, re_node_set *eps_via_nodes)
{
@@ -1432,7 +1432,7 @@ pop_fail_stack (struct re_fail_stack_t *fs, Idx *pidx, Idx nregs,
pmatch[i].rm_so == pmatch[i].rm_eo == -1 for 0 < i < nmatch. */
static reg_errcode_t
-internal_function
+internal_function __attribute_warn_unused_result__
set_regs (const regex_t *preg, const re_match_context_t *mctx, size_t nmatch,
regmatch_t *pmatch, bool fl_backtrack)
{
@@ -1667,7 +1667,7 @@ sift_states_backward (const re_match_context_t *mctx, re_sift_context_t *sctx)
if (mctx->state_log[str_idx])
{
err = build_sifted_states (mctx, sctx, str_idx, &cur_dest);
- if (BE (err != REG_NOERROR, 0))
+ if (BE (err != REG_NOERROR, 0))
goto free_return;
}
@@ -1686,7 +1686,7 @@ sift_states_backward (const re_match_context_t *mctx, re_sift_context_t *sctx)
}
static reg_errcode_t
-internal_function
+internal_function __attribute_warn_unused_result__
build_sifted_states (const re_match_context_t *mctx, re_sift_context_t *sctx,
Idx str_idx, re_node_set *cur_dest)
{
@@ -1848,7 +1848,7 @@ update_cur_sifted_state (const re_match_context_t *mctx,
}
static reg_errcode_t
-internal_function
+internal_function __attribute_warn_unused_result__
add_epsilon_src_nodes (const re_dfa_t *dfa, re_node_set *dest_nodes,
const re_node_set *candidates)
{
@@ -1863,10 +1863,14 @@ add_epsilon_src_nodes (const re_dfa_t *dfa, re_node_set *dest_nodes,
{
err = re_node_set_alloc (&state->inveclosure, dest_nodes->nelem);
if (BE (err != REG_NOERROR, 0))
- return REG_ESPACE;
+ return REG_ESPACE;
for (i = 0; i < dest_nodes->nelem; i++)
- re_node_set_merge (&state->inveclosure,
- dfa->inveclosures + dest_nodes->elems[i]);
+ {
+ err = re_node_set_merge (&state->inveclosure,
+ dfa->inveclosures + dest_nodes->elems[i]);
+ if (BE (err != REG_NOERROR, 0))
+ return REG_ESPACE;
+ }
}
return re_node_set_add_intersect (dest_nodes, candidates,
&state->inveclosure);
@@ -1978,7 +1982,7 @@ check_dst_limits_calc_pos_1 (const re_match_context_t *mctx, int boundaries,
{
struct re_backref_cache_entry *ent = mctx->bkref_ents + bkref_idx;
do
- {
+ {
Idx dst;
int cpos;
@@ -2000,9 +2004,9 @@ check_dst_limits_calc_pos_1 (const re_match_context_t *mctx, int boundaries,
if (dst == from_node)
{
if (boundaries & 1)
- return -1;
+ return -1;
else /* if (boundaries & 2) */
- return 0;
+ return 0;
}
cpos =
@@ -2016,7 +2020,7 @@ check_dst_limits_calc_pos_1 (const re_match_context_t *mctx, int boundaries,
if (subexp_idx < BITSET_WORD_BITS)
ent->eps_reachable_subexps_map
&= ~((bitset_word_t) 1 << subexp_idx);
- }
+ }
while (ent++->more);
}
break;
@@ -2158,7 +2162,7 @@ check_subexp_limits (const re_dfa_t *dfa, re_node_set *dest_nodes,
}
static reg_errcode_t
-internal_function
+internal_function __attribute_warn_unused_result__
sift_states_bkref (const re_match_context_t *mctx, re_sift_context_t *sctx,
Idx str_idx, const re_node_set *candidates)
{
@@ -2241,7 +2245,7 @@ sift_states_bkref (const re_match_context_t *mctx, re_sift_context_t *sctx,
re_node_set_remove (&local_sctx.limits, enabled_idx);
/* mctx->bkref_ents may have changed, reload the pointer. */
- entry = mctx->bkref_ents + enabled_idx;
+ entry = mctx->bkref_ents + enabled_idx;
}
while (enabled_idx++, entry++->more);
}
@@ -2288,7 +2292,7 @@ sift_states_iter_mb (const re_match_context_t *mctx, re_sift_context_t *sctx,
update the destination of STATE_LOG. */
static re_dfastate_t *
-internal_function
+internal_function __attribute_warn_unused_result__
transit_state (reg_errcode_t *err, re_match_context_t *mctx,
re_dfastate_t *state)
{
@@ -2322,7 +2326,7 @@ transit_state (reg_errcode_t *err, re_match_context_t *mctx,
trtable = state->word_trtable;
if (BE (trtable != NULL, 1))
- {
+ {
unsigned int context;
context
= re_string_context_at (&mctx->input,
@@ -2368,21 +2372,21 @@ merge_state_with_log (reg_errcode_t *err, re_match_context_t *mctx,
unsigned int context;
re_node_set next_nodes, *log_nodes, *table_nodes = NULL;
/* If (state_log[cur_idx] != 0), it implies that cur_idx is
- the destination of a multibyte char/collating element/
- back reference. Then the next state is the union set of
- these destinations and the results of the transition table. */
+ the destination of a multibyte char/collating element/
+ back reference. Then the next state is the union set of
+ these destinations and the results of the transition table. */
pstate = mctx->state_log[cur_idx];
log_nodes = pstate->entrance_nodes;
if (next_state != NULL)
- {
- table_nodes = next_state->entrance_nodes;
- *err = re_node_set_init_union (&next_nodes, table_nodes,
+ {
+ table_nodes = next_state->entrance_nodes;
+ *err = re_node_set_init_union (&next_nodes, table_nodes,
log_nodes);
- if (BE (*err != REG_NOERROR, 0))
+ if (BE (*err != REG_NOERROR, 0))
return NULL;
- }
+ }
else
- next_nodes = *log_nodes;
+ next_nodes = *log_nodes;
/* Note: We already add the nodes of the initial state,
then we don't need to add them here. */
@@ -2390,12 +2394,12 @@ merge_state_with_log (reg_errcode_t *err, re_match_context_t *mctx,
re_string_cur_idx (&mctx->input) - 1,
mctx->eflags);
next_state = mctx->state_log[cur_idx]
- = re_acquire_state_context (err, dfa, &next_nodes, context);
+ = re_acquire_state_context (err, dfa, &next_nodes, context);
/* We don't need to check errors here, since the return value of
- this function is next_state and ERR is already set. */
+ this function is next_state and ERR is already set. */
if (table_nodes != NULL)
- re_node_set_free (&next_nodes);
+ re_node_set_free (&next_nodes);
}
if (BE (dfa->nbackref, 0) && next_state != NULL)
@@ -2436,9 +2440,9 @@ find_recover_state (reg_errcode_t *err, re_match_context_t *mctx)
do
{
- if (++cur_str_idx > max)
- return NULL;
- re_string_skip_bytes (&mctx->input, 1);
+ if (++cur_str_idx > max)
+ return NULL;
+ re_string_skip_bytes (&mctx->input, 1);
}
while (mctx->state_log[cur_str_idx] == NULL);
@@ -2546,7 +2550,7 @@ transit_state_mb (re_match_context_t *mctx, re_dfastate_t *pstate)
re_dfastate_t *dest_state;
if (!dfa->nodes[cur_node_idx].accept_mb)
- continue;
+ continue;
if (dfa->nodes[cur_node_idx].constraint)
{
@@ -2714,7 +2718,7 @@ transit_state_bkref (re_match_context_t *mctx, const re_node_set *nodes)
delay these checking for prune_impossible_nodes(). */
static reg_errcode_t
-internal_function
+internal_function __attribute_warn_unused_result__
get_subexp (re_match_context_t *mctx, Idx bkref_node, Idx bkref_str_idx)
{
const re_dfa_t *const dfa = mctx->dfa;
@@ -2727,7 +2731,7 @@ get_subexp (re_match_context_t *mctx, Idx bkref_node, Idx bkref_str_idx)
const struct re_backref_cache_entry *entry
= mctx->bkref_ents + cache_idx;
do
- if (entry->node == bkref_node)
+ if (entry->node == bkref_node)
return REG_NOERROR; /* We already checked it. */
while (entry++->more);
}
@@ -2915,7 +2919,7 @@ find_subexp_node (const re_dfa_t *dfa, const re_node_set *nodes,
Return REG_NOERROR if it can arrive, or REG_NOMATCH otherwise. */
static reg_errcode_t
-internal_function
+internal_function __attribute_warn_unused_result__
check_arrival (re_match_context_t *mctx, state_array_t *path, Idx top_node,
Idx top_str, Idx last_node, Idx last_str, int type)
{
@@ -3077,7 +3081,7 @@ check_arrival (re_match_context_t *mctx, state_array_t *path, Idx top_node,
Can't we unify them? */
static reg_errcode_t
-internal_function
+internal_function __attribute_warn_unused_result__
check_arrival_add_next_nodes (re_match_context_t *mctx, Idx str_idx,
re_node_set *cur_nodes, re_node_set *next_nodes)
{
@@ -3211,7 +3215,7 @@ check_arrival_expand_ecl (const re_dfa_t *dfa, re_node_set *cur_nodes,
problematic append it to DST_NODES. */
static reg_errcode_t
-internal_function
+internal_function __attribute_warn_unused_result__
check_arrival_expand_ecl_sub (const re_dfa_t *dfa, re_node_set *dst_nodes,
Idx target, Idx ex_subexp, int type)
{
@@ -3256,7 +3260,7 @@ check_arrival_expand_ecl_sub (const re_dfa_t *dfa, re_node_set *dst_nodes,
in MCTX->BKREF_ENTS. */
static reg_errcode_t
-internal_function
+internal_function __attribute_warn_unused_result__
expand_bkref_cache (re_match_context_t *mctx, re_node_set *cur_nodes,
Idx cur_str, Idx subexp_num, int type)
{
@@ -3622,7 +3626,7 @@ group_nodes_into_DFAstates (const re_dfa_t *dfa, const re_dfastate_t *state,
}
#ifdef RE_ENABLE_I18N
else if (type == OP_UTF8_PERIOD)
- {
+ {
if (ASCII_CHARS % BITSET_WORD_BITS == 0)
memset (accepts, -1, ASCII_CHARS / CHAR_BIT);
else
@@ -3631,7 +3635,7 @@ group_nodes_into_DFAstates (const re_dfa_t *dfa, const re_dfastate_t *state,
bitset_clear (accepts, '\n');
if (dfa->syntax & RE_DOT_NOT_NULL)
bitset_clear (accepts, '\0');
- }
+ }
#endif
else
continue;
@@ -3836,7 +3840,7 @@ check_node_accept_bytes (const re_dfa_t *dfa, Idx node_idx,
if (node->type == OP_PERIOD)
{
if (char_len <= 1)
- return 0;
+ return 0;
/* FIXME: I don't think this if is needed, as both '\n'
and '\0' are char_len == 1. */
/* '.' accepts any one character except the following two cases. */
@@ -3949,15 +3953,20 @@ check_node_accept_bytes (const re_dfa_t *dfa, Idx node_idx,
_NL_CURRENT (LC_COLLATE, _NL_COLLATE_EXTRAMB);
indirect = (const int32_t *)
_NL_CURRENT (LC_COLLATE, _NL_COLLATE_INDIRECTMB);
- idx = findidx (&cp);
+ int32_t idx = findidx (&cp);
if (idx > 0)
for (i = 0; i < cset->nequiv_classes; ++i)
{
int32_t equiv_class_idx = cset->equiv_classes[i];
- size_t weight_len = weights[idx];
- if (weight_len == weights[equiv_class_idx])
+ size_t weight_len = weights[idx & 0xffffff];
+ if (weight_len == weights[equiv_class_idx & 0xffffff]
+ && (idx >> 24) == (equiv_class_idx >> 24))
{
Idx cnt = 0;
+
+ idx &= 0xffffff;
+ equiv_class_idx &= 0xffffff;
+
while (cnt <= weight_len
&& (weights[equiv_class_idx + 1 + cnt]
== weights[idx + 1 + cnt]))
@@ -4123,7 +4132,7 @@ check_node_accept (const re_match_context_t *mctx, const re_token_t *node,
/* Extend the buffers, if the buffers have run out. */
static reg_errcode_t
-internal_function
+internal_function __attribute_warn_unused_result__
extend_buffers (re_match_context_t *mctx)
{
reg_errcode_t ret;
@@ -4186,7 +4195,7 @@ extend_buffers (re_match_context_t *mctx)
/* Initialize MCTX. */
static reg_errcode_t
-internal_function
+internal_function __attribute_warn_unused_result__
match_ctx_init (re_match_context_t *mctx, int eflags, Idx n)
{
mctx->eflags = eflags;
@@ -4266,7 +4275,7 @@ match_ctx_free (re_match_context_t *mctx)
*/
static reg_errcode_t
-internal_function
+internal_function __attribute_warn_unused_result__
match_ctx_add_entry (re_match_context_t *mctx, Idx node, Idx str_idx, Idx from,
Idx to)
{
@@ -4338,7 +4347,7 @@ search_cur_bkref_entry (const re_match_context_t *mctx, Idx str_idx)
at STR_IDX. */
static reg_errcode_t
-internal_function
+internal_function __attribute_warn_unused_result__
match_ctx_add_subtop (re_match_context_t *mctx, Idx node, Idx str_idx)
{
#ifdef DEBUG