segmentation fault after some modification about regex operation of glibc-2.7
peng yang
peterpy8@gmail.com
Thu Aug 25 23:00:00 GMT 2011
I am a new guy doing glibc dev, so my question may seem silly.
In order to get one transition table just
like(http://lambda.uta.edu/cse5317/notes/node8.html) related to one
regular expression. I add an interface
“int regtrtable (const regex_t *__restrict __preg, char * trantable,
int *state_num_ptr);” in regex.h. It compiles well. However, after
make install, lots of segmentation fault come to the screen. And I
don't know how to rescue my OS. Besides, my modification patch will be
added below. I hope you guys can help me figure out that some mistakes
there may be.
diff -r ace192926b61 -r b5b99be08504 posix/regex.h
--- a/posix/regex.h Mon Aug 22 07:38:59 2011 -0400
+++ b/posix/regex.h Tue Aug 23 02:39:37 2011 -0400
@@ -533,6 +533,11 @@
# endif
#endif
+/* Added by peter for opencl acceleration */
+extern int regtrtable (const regex_t *__restrict __preg, char *
trantable, int *state_num_ptr);
+
+extern int freetrtable (char * trantable);
+
/* POSIX compatibility. */
extern int regcomp (regex_t *__restrict __preg,
const char *__restrict __pattern,
diff -r ace192926b61 -r b5b99be08504 posix/regex_internal.c
--- a/posix/regex_internal.c Mon Aug 22 07:38:59 2011 -0400
+++ b/posix/regex_internal.c Tue Aug 23 02:39:37 2011 -0400
@@ -30,6 +30,52 @@
unsigned int context,
unsigned int hash) internal_function;
+/* Function to build the global transition table */
+linklist * init_state_linklist(void)
+{
+ linklist *p;
+ p = (linklist *)malloc(sizeof(linklist));
+ p ->next = NULL;
+ r = p;
+ return p;
+}
+
+void insert_state_linklist(re_dfastate_t *state)
+{
+ cur_state_linklist = (linklist *)malloc(sizeof(linklist));
+ cur_state_linklist -> state = state;
+ cur_state_linklist -> next = NULL;
+ r -> next = cur_state_linklist;
+ r = cur_state_linklist;
+ return;
+}
+
+void destroy_state_linklist(void)
+{
+ linklist *p = state_linklist_head, *q = p -> next;
+ while(q != NULL)
+ {
+ free(p);
+ p = q;
+ q = p -> next;
+ }
+ free(p);
+
+}
+
+void disp_state_linklist(void)
+{
+ linklist *p = state_linklist_head->next;
+ printf("\nstate_id_size = %d.", state_id_count);
+
+ while(p != NULL)
+ {
+ printf("\nstateid: %d", p->state->state_id);
+ p = p->next;
+ }
+ return;
+}
+
/* Functions for string operation. */
/* This function allocate the buffers. It is necessary to call
@@ -1488,6 +1534,7 @@
if (BE (new_state == NULL, 0))
*err = REG_ESPACE;
+ insert_state_linklist(new_state);
return new_state;
}
@@ -1531,6 +1578,7 @@
if (BE (new_state == NULL, 0))
*err = REG_ESPACE;
+ insert_state_linklist(new_state);
return new_state;
}
@@ -1611,6 +1659,7 @@
}
newstate->entrance_nodes = &newstate->nodes;
+ newstate->state_id = get_state_id();
for (i = 0 ; i < nodes->nelem ; i++)
{
re_token_t *node = dfa->nodes + nodes->elems[i];
@@ -1662,6 +1711,7 @@
newstate->context = context;
newstate->entrance_nodes = &newstate->nodes;
+ newstate->state_id = get_state_id();
for (i = 0 ; i < nodes->nelem ; i++)
{
diff -r ace192926b61 -r b5b99be08504 posix/regex_internal.h
--- a/posix/regex_internal.h Mon Aug 22 07:38:59 2011 -0400
+++ b/posix/regex_internal.h Tue Aug 23 02:39:37 2011 -0400
@@ -497,6 +497,7 @@
re_node_set inveclosure;
re_node_set *entrance_nodes;
struct re_dfastate_t **trtable, **word_trtable;
+ unsigned short int state_id;
unsigned int context : 4;
unsigned int halt : 1;
/* If this state can accept `multi byte'.
@@ -509,6 +510,18 @@
};
typedef struct re_dfastate_t re_dfastate_t;
+typedef struct lnode
+{
+ re_dfastate_t *state;
+ unsigned short int trtable[256];
+ unsigned short int final; /*final == 1, lnode is final state. */
+ struct lnode *next;
+}linklist;
+
+unsigned int state_id_count;
+linklist *state_linklist_head;
+linklist *r = NULL , *cur_state_linklist = NULL;
+
struct re_state_table_entry
{
int num;
@@ -684,6 +697,21 @@
} opr;
} bracket_elem_t;
+static inline void init_state_id_count(void)
+{
+ state_id_count = 0;
+}
+
+static inline unsigned int get_state_id(void)
+{
+ state_id_count++;
+ return state_id_count;
+}
+
+linklist * init_state_linklist(void);
+void insert_state_linklist(re_dfastate_t * state);
+void destroy_state_linklist(void);
+void disp_state_linklist(void);
/* Inline functions for bitset operation. */
static inline void
diff -r ace192926b61 -r b5b99be08504 posix/regexec.c
--- a/posix/regexec.c Mon Aug 22 07:38:59 2011 -0400
+++ b/posix/regexec.c Tue Aug 23 02:39:37 2011 -0400
@@ -41,6 +41,8 @@
int start, int range, int stop,
size_t nmatch, regmatch_t pmatch[],
int eflags) internal_function;
+static reg_errcode_t regtrtable_internal (const regex_t *preg,
+ char * trantable, int *state_num_ptr) internal_function;
static int re_search_2_stub (struct re_pattern_buffer *bufp,
const char *string1, int length1,
const char *string2, int length2,
@@ -127,6 +129,10 @@
static re_dfastate_t *transit_state (reg_errcode_t *err,
re_match_context_t *mctx,
re_dfastate_t *state) internal_function;
+static re_dfastate_t *transit_state_by_char (reg_errcode_t *err,
+ re_dfa_t *dfa,
+ unsigned char ch,
+ re_dfastate_t *state) internal_function;
static re_dfastate_t *merge_state_with_log (reg_errcode_t *err,
re_match_context_t *mctx,
re_dfastate_t *next_state)
@@ -200,7 +206,38 @@
internal_function;
static reg_errcode_t extend_buffers (re_match_context_t *mctx)
internal_function;
-
+
+/* Added by peter for opencl acceleration
+ * return 0: success
+ * return 1: non-success
+ * */
+int
+regtrtable (preg, trantable, state_num_ptr)
+ const regex_t *__restrict preg;
+ char * trantable;
+ int * state_num_ptr;
+{
+ reg_errcode_t err;
+ re_dfa_t *dfa = (re_dfa_t *)preg->buffer;
+
+ __libc_lock_lock (dfa->lock);
+
+ err = regtrtable_internal(preg, trantable, state_num_ptr);
+
+ __libc_lock_unlock (dfa->lock);
+
+ return err != REG_NOERROR;
+}
+
+int
+freetrtable (trantable)
+ char * trantable;
+{
+ free(trantable);
+ return 0;
+}
+
+
/* Entry point for POSIX code. */
/* regexec searches for a given pattern, specified by PREG, in the
@@ -607,6 +644,51 @@
/* Internal entry point. */
+static reg_errcode_t
+regtrtable_internal(preg, trantable, state_num_ptr)
+ const regex_t *preg;
+ char * trantable;
+ int * state_num_ptr;
+{
+ unsigned char ch = 0;
+ int i, item_size;
+ reg_errcode_t err;
+ re_dfastate_t * cur_state, * new_state;
+ re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
+ err = REG_NOERROR;
+ state_linklist_head = init_state_linklist();
+
+ cur_state = dfa->init_state;
+ insert_state_linklist(cur_state);
+
+ while(cur_state_linklist -> next != NULL)
+ {
+ for(i = 0; i < 256; i++)
+ {
+ new_state = transit_state_by_char(&err, dfa, ch, cur_state);
+
+ cur_state_linklist->trtable[ch] = new_state->state_id;
+ cur_state_linklist->final =
cur_state_linklist->state->halt ? 1 : 0;
+ ch ++;
+ }
+
+ cur_state = cur_state_linklist -> next -> state;
+ }
+ state_num_ptr[0] = state_id_count;
+
+ item_size = 257 * sizeof(unsigned short int);
+ trantable = (char *)malloc(state_id_count * item_size);
+
+ cur_state_linklist = state_linklist_head -> next;
+ for(i=0; i< state_id_count; i++)
+ {
+ memcpy(trantable + i * item_size,
cur_state_linklist->trtable, item_size);
+ cur_state_linklist = cur_state_linklist -> next;
+ }
+
+ destroy_state_linklist();
+}
+
/* Searches for a compiled pattern PREG in the string STRING, whose
length is LENGTH. NMATCH, PMATCH, and EFLAGS have the same
mingings with regexec. START, and RANGE have the same meanings
@@ -2295,6 +2377,31 @@
}
}
+static re_dfastate_t *
+internal_function
+transit_state_by_char (reg_errcode_t *err,
+ re_dfa_t *dfa,
+ unsigned char ch,
+ re_dfastate_t *state)
+{
+ re_dfastate_t **trtable;
+
+ /* Use transition table */
+ for (;;)
+ {
+ trtable = state->trtable;
+ if (BE (trtable != NULL, 1))
+ return trtable[ch];
+
+ if (!build_trtable (dfa, state))
+ {
+ *err = REG_ESPACE;
+ return NULL;
+ }
+
+ /* Retry, we now have a transition table. */
+ }
+}
/* Update the state_log if we need */
re_dfastate_t *
internal_function
More information about the Libc-help
mailing list