segmentation fault after some modification about regex operation of glibc-2.7

peng yang peterpy8@gmail.com
Thu Aug 25 23:00:00 GMT 2011


I am a new guy doing glibc dev, so my question may seem silly.
In order to get one transition table just
like(http://lambda.uta.edu/cse5317/notes/node8.html) related to one
regular expression. I add an interface
“int regtrtable (const regex_t *__restrict __preg, char * trantable,
int *state_num_ptr);” in regex.h. It compiles well. However, after
make install, lots of segmentation fault come to the screen. And I
don't know how to rescue my OS. Besides, my modification patch will be
added below. I hope you guys can help me figure out that some mistakes
there may be.

diff -r ace192926b61 -r b5b99be08504 posix/regex.h
--- a/posix/regex.h     Mon Aug 22 07:38:59 2011 -0400
+++ b/posix/regex.h     Tue Aug 23 02:39:37 2011 -0400
@@ -533,6 +533,11 @@
 # endif
 #endif

+/* Added by peter for opencl acceleration */
+extern int regtrtable (const regex_t *__restrict __preg, char *
trantable, int *state_num_ptr);
+
+extern int freetrtable (char * trantable);
+
 /* POSIX compatibility.  */
 extern int regcomp (regex_t *__restrict __preg,
                   const char *__restrict __pattern,
diff -r ace192926b61 -r b5b99be08504 posix/regex_internal.c
--- a/posix/regex_internal.c    Mon Aug 22 07:38:59 2011 -0400
+++ b/posix/regex_internal.c    Tue Aug 23 02:39:37 2011 -0400
@@ -30,6 +30,52 @@
                                         unsigned int context,
                                         unsigned int hash) internal_function;

+/* Function to build the global transition table */
+linklist * init_state_linklist(void)
+{
+    linklist *p;
+    p = (linklist *)malloc(sizeof(linklist));
+    p ->next = NULL;
+    r = p;
+    return p;
+}
+
+void insert_state_linklist(re_dfastate_t *state)
+{
+    cur_state_linklist = (linklist *)malloc(sizeof(linklist));
+    cur_state_linklist -> state = state;
+    cur_state_linklist -> next = NULL;
+    r -> next = cur_state_linklist;
+    r = cur_state_linklist;
+    return;
+}
+
+void destroy_state_linklist(void)
+{
+    linklist *p = state_linklist_head, *q = p -> next;
+    while(q != NULL)
+    {
+        free(p);
+        p = q;
+        q = p -> next;
+    }
+    free(p);
+
+}
+
+void disp_state_linklist(void)
+{
+    linklist *p = state_linklist_head->next;
+    printf("\nstate_id_size = %d.", state_id_count);
+
+    while(p != NULL)
+    {
+        printf("\nstateid: %d", p->state->state_id);
+        p = p->next;
+    }
+    return;
+}
+
 /* Functions for string operation.  */

 /* This function allocate the buffers.  It is necessary to call
@@ -1488,6 +1534,7 @@
  if (BE (new_state == NULL, 0))
    *err = REG_ESPACE;

+  insert_state_linklist(new_state);
  return new_state;
 }

@@ -1531,6 +1578,7 @@
  if (BE (new_state == NULL, 0))
    *err = REG_ESPACE;

+  insert_state_linklist(new_state);
  return new_state;
 }

@@ -1611,6 +1659,7 @@
    }

  newstate->entrance_nodes = &newstate->nodes;
+  newstate->state_id = get_state_id();
  for (i = 0 ; i < nodes->nelem ; i++)
    {
      re_token_t *node = dfa->nodes + nodes->elems[i];
@@ -1662,6 +1711,7 @@

  newstate->context = context;
  newstate->entrance_nodes = &newstate->nodes;
+  newstate->state_id = get_state_id();

  for (i = 0 ; i < nodes->nelem ; i++)
    {
diff -r ace192926b61 -r b5b99be08504 posix/regex_internal.h
--- a/posix/regex_internal.h    Mon Aug 22 07:38:59 2011 -0400
+++ b/posix/regex_internal.h    Tue Aug 23 02:39:37 2011 -0400
@@ -497,6 +497,7 @@
  re_node_set inveclosure;
  re_node_set *entrance_nodes;
  struct re_dfastate_t **trtable, **word_trtable;
+  unsigned short int state_id;
  unsigned int context : 4;
  unsigned int halt : 1;
  /* If this state can accept `multi byte'.
@@ -509,6 +510,18 @@
 };
 typedef struct re_dfastate_t re_dfastate_t;

+typedef struct lnode
+{
+    re_dfastate_t *state;
+    unsigned short int trtable[256];
+    unsigned short int final; /*final == 1, lnode is final state. */
+    struct lnode *next;
+}linklist;
+
+unsigned int state_id_count;
+linklist *state_linklist_head;
+linklist *r = NULL , *cur_state_linklist = NULL;
+
 struct re_state_table_entry
 {
  int num;
@@ -684,6 +697,21 @@
  } opr;
 } bracket_elem_t;

+static inline void init_state_id_count(void)
+{
+    state_id_count = 0;
+}
+
+static inline unsigned int get_state_id(void)
+{
+    state_id_count++;
+    return state_id_count;
+}
+
+linklist * init_state_linklist(void);
+void insert_state_linklist(re_dfastate_t * state);
+void destroy_state_linklist(void);
+void disp_state_linklist(void);

 /* Inline functions for bitset operation.  */
 static inline void
diff -r ace192926b61 -r b5b99be08504 posix/regexec.c
--- a/posix/regexec.c   Mon Aug 22 07:38:59 2011 -0400
+++ b/posix/regexec.c   Tue Aug 23 02:39:37 2011 -0400
@@ -41,6 +41,8 @@
                                        int start, int range, int stop,
                                        size_t nmatch, regmatch_t pmatch[],
                                        int eflags) internal_function;
+static reg_errcode_t regtrtable_internal (const regex_t *preg,
+                     char * trantable, int *state_num_ptr) internal_function;
 static int re_search_2_stub (struct re_pattern_buffer *bufp,
                            const char *string1, int length1,
                            const char *string2, int length2,
@@ -127,6 +129,10 @@
 static re_dfastate_t *transit_state (reg_errcode_t *err,
                                    re_match_context_t *mctx,
                                    re_dfastate_t *state) internal_function;
+static re_dfastate_t *transit_state_by_char (reg_errcode_t *err,
+                     re_dfa_t *dfa,
+                     unsigned char ch,
+                     re_dfastate_t *state) internal_function;
 static re_dfastate_t *merge_state_with_log (reg_errcode_t *err,
                                           re_match_context_t *mctx,
                                           re_dfastate_t *next_state)
@@ -200,7 +206,38 @@
     internal_function;
 static reg_errcode_t extend_buffers (re_match_context_t *mctx)
     internal_function;
-
+
+/* Added by peter for opencl acceleration
+ * return 0: success
+ * return 1: non-success
+ * */
+int
+regtrtable (preg, trantable, state_num_ptr)
+    const regex_t *__restrict preg;
+    char * trantable;
+    int * state_num_ptr;
+{
+    reg_errcode_t err;
+    re_dfa_t *dfa = (re_dfa_t *)preg->buffer;
+
+    __libc_lock_lock (dfa->lock);
+
+    err = regtrtable_internal(preg, trantable, state_num_ptr);
+
+    __libc_lock_unlock (dfa->lock);
+
+   return err != REG_NOERROR;
+}
+
+int
+freetrtable (trantable)
+    char * trantable;
+{
+    free(trantable);
+    return 0;
+}
+
+
 /* Entry point for POSIX code.  */

 /* regexec searches for a given pattern, specified by PREG, in the
@@ -607,6 +644,51 @@

 /* Internal entry point.  */

+static reg_errcode_t
+regtrtable_internal(preg, trantable, state_num_ptr)
+    const regex_t *preg;
+    char * trantable;
+    int * state_num_ptr;
+{
+    unsigned char ch = 0;
+    int i, item_size;
+    reg_errcode_t err;
+    re_dfastate_t * cur_state, * new_state;
+    re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
+    err = REG_NOERROR;
+    state_linklist_head = init_state_linklist();
+
+    cur_state = dfa->init_state;
+    insert_state_linklist(cur_state);
+
+    while(cur_state_linklist -> next != NULL)
+    {
+        for(i = 0; i < 256; i++)
+        {
+            new_state = transit_state_by_char(&err, dfa, ch, cur_state);
+
+            cur_state_linklist->trtable[ch] = new_state->state_id;
+            cur_state_linklist->final =
cur_state_linklist->state->halt ? 1 : 0;
+            ch ++;
+        }
+
+        cur_state = cur_state_linklist -> next -> state;
+    }
+    state_num_ptr[0] = state_id_count;
+
+    item_size = 257 * sizeof(unsigned short int);
+    trantable = (char *)malloc(state_id_count * item_size);
+
+    cur_state_linklist = state_linklist_head -> next;
+    for(i=0; i< state_id_count; i++)
+    {
+        memcpy(trantable + i * item_size,
cur_state_linklist->trtable, item_size);
+        cur_state_linklist = cur_state_linklist -> next;
+    }
+
+    destroy_state_linklist();
+}
+
 /* Searches for a compiled pattern PREG in the string STRING, whose
   length is LENGTH.  NMATCH, PMATCH, and EFLAGS have the same
   mingings with regexec.  START, and RANGE have the same meanings
@@ -2295,6 +2377,31 @@
    }
 }

+static re_dfastate_t *
+internal_function
+transit_state_by_char (reg_errcode_t *err,
+                       re_dfa_t *dfa,
+                       unsigned char ch,
+                       re_dfastate_t *state)
+{
+  re_dfastate_t **trtable;
+
+  /* Use transition table  */
+  for (;;)
+  {
+      trtable = state->trtable;
+      if (BE (trtable != NULL, 1))
+          return trtable[ch];
+
+      if (!build_trtable (dfa, state))
+      {
+          *err = REG_ESPACE;
+          return NULL;
+      }
+
+      /* Retry, we now have a transition table.  */
+  }
+}
 /* Update the state_log if we need */
 re_dfastate_t *
 internal_function



More information about the Libc-help mailing list