Pascal and case sensitivity

Jan Kratochvil jan.kratochvil@redhat.com
Mon Jan 10 20:36:00 GMT 2011


Hi Joost,

On Mon, 10 Jan 2011 21:00:25 +0100, Joost van der Sluis wrote:
> Attached is patch to fix case-sensitivity issues with Pascal. It first
> searches case-sensitive an if an identifier isn't found, a
> case-insensitive search is performed.

without commenting on your patch I had one with a similar goal here.

Just this stuff depends on full symtabs expansion and I found a regression
already in FSF GDB:
	http://sourceware.org/ml/gdb-patches/2010-11/msg00300.html
	http://sourceware.org/ml/gdb-patches/2010-11/msg00300.html
which is
	http://sourceware.org/bugzilla/show_bug.cgi?id=11734
with the fix currently pending unreviewed:
	Re: [RFA] c++/11734 revisited
	http://sourceware.org/ml/gdb-patches/2010-12/msg00263.html

Also .gdb_index may need updates for it but before this basic full symtabs
expansion gets resolved it does not make sense.


Regards,
Jan
-------------- next part --------------
--- a/gdb/dictionary.c
+++ b/gdb/dictionary.c
@@ -836,7 +836,7 @@ dict_hash (const char *string0)
 	    }
 	  /* FALL THROUGH */
 	default:
-	  hash = hash * 67 + *string - 113;
+	  hash = SYMBOL_HASH_NEXT (hash, *string);
 	  string += 1;
 	  break;
 	}
--- a/gdb/dwarf2read.c
+++ b/gdb/dwarf2read.c
@@ -71,6 +71,7 @@
 #define MAP_FAILED ((void *) -1)
 #endif
 #endif
+#include <ctype.h>
 
 typedef struct symbol *symbolp;
 DEF_VEC_P (symbolp);
@@ -148,6 +149,8 @@ DEF_VEC_I (offset_type);
    a comment by the code that writes the index.  */
 struct mapped_index
 {
+  /* Index data format version.  */
+  int version;
   /* The total length of the buffer.  */
   off_t total_size;
   /* A pointer to the address table data.  */
@@ -1853,21 +1856,26 @@ create_addrmap_from_index (struct objfile *objfile, struct mapped_index *index)
   do_cleanups (cleanup);
 }
 
-/* The hash function for strings in the mapped index.  This is the
-   same as the hashtab.c hash function, but we keep a separate copy to
-   maintain control over the implementation.  This is necessary
-   because the hash function is tied to the format of the mapped index
-   file.  */
+/* The hash function for strings in the mapped index.  This is the same as
+   SYMBOL_HASH_NEXT, but we keep a separate copy to maintain control over the
+   implementation.  This is necessary because the hash function is tied to the
+   format of the mapped index file.
+   
+   Use INT_MAX for INDEX_VERSION if you generate the current index format.  */
 
 static hashval_t
-mapped_index_string_hash (const void *p)
+mapped_index_string_hash (int index_version, const void *p)
 {
   const unsigned char *str = (const unsigned char *) p;
   hashval_t r = 0;
   unsigned char c;
 
   while ((c = *str++) != 0)
-    r = r * 67 + c - 113;
+    {
+      if (index_version >= 4)
+	c = tolower (c);
+      r = r * 67 + c - 113;
+    }
 
   return r;
 }
@@ -1880,7 +1888,7 @@ static int
 find_slot_in_mapped_hash (struct mapped_index *index, const char *name,
 			  offset_type **vec_out)
 {
-  offset_type hash = mapped_index_string_hash (name);
+  offset_type hash = mapped_index_string_hash (index->version, name);
   offset_type slot, step;
 
   slot = hash & (index->symbol_table_slots - 1);
@@ -1895,7 +1903,7 @@ find_slot_in_mapped_hash (struct mapped_index *index, const char *name,
 	return 0;
 
       str = index->constant_pool + MAYBE_SWAP (index->symbol_table[i]);
-      if (!strcmp (name, str))
+      if (!strcmp_iw (str, name))
 	{
 	  *vec_out = (offset_type *) (index->constant_pool
 				      + MAYBE_SWAP (index->symbol_table[i + 1]));
@@ -1941,8 +1949,13 @@ dwarf2_read_index (struct objfile *objfile)
      it seems better to just ignore such indices.  */
   if (version < 3)
     return 0;
+  /* Indexes with higher version than the one supported by GDB may be no
+     longer backward compatible.  */
+  if (version > 4)
+    return 0;
 
   map = OBSTACK_ZALLOC (&objfile->objfile_obstack, struct mapped_index);
+  map->version = version;
   map->total_size = dwarf2_per_objfile->gdb_index.size;
 
   metadata = (offset_type *) (addr + sizeof (offset_type));
@@ -14800,13 +14813,16 @@ struct strtab_entry
   const char *str;
 };
 
-/* Hash function for a strtab_entry.  */
+/* Hash function for a strtab_entry.
+
+   Function is used only during write_hash_table so no index format backward
+   compatibility is needed.  */
 
 static hashval_t
 hash_strtab_entry (const void *e)
 {
   const struct strtab_entry *entry = e;
-  return mapped_index_string_hash (entry->str);
+  return mapped_index_string_hash (INT_MAX, entry->str);
 }
 
 /* Equality function for a strtab_entry.  */
@@ -14944,12 +14960,15 @@ cleanup_mapped_symtab (void *p)
 }
 
 /* Find a slot in SYMTAB for the symbol NAME.  Returns a pointer to
-   the slot.  */
+   the slot.
+   
+   Function is used only during write_hash_table so no index format backward
+   compatibility is needed.  */
 
 static struct symtab_index_entry **
 find_slot (struct mapped_symtab *symtab, const char *name)
 {
-  offset_type index, step, hash = mapped_index_string_hash (name);
+  offset_type index, step, hash = mapped_index_string_hash (INT_MAX, name);
 
   index = hash & (symtab->size - 1);
   step = ((hash * 17) & (symtab->size - 1)) | 1;
@@ -15349,7 +15368,7 @@ write_psymtabs_to_index (struct objfile *objfile, const char *dir)
   total_len = size_of_contents;
 
   /* The version number.  */
-  val = MAYBE_SWAP (3);
+  val = MAYBE_SWAP (4);
   obstack_grow (&contents, &val, sizeof (val));
 
   /* The offset of the CU list from the start of the file.  */
@@ -15407,8 +15426,8 @@ write_psymtabs_to_index (struct objfile *objfile, const char *dir)
    1. The file header.  This is a sequence of values, of offset_type
    unless otherwise noted:
 
-   [0] The version number, currently 3.  Versions 1 and 2 are
-   obsolete.
+   [0] The version number, currently 4.  Versions 1 and 2 are
+   obsolete.  Version 3 differs by its hashing function.
    [1] The offset, from the start of the file, of the CU list.
    [2] The offset, from the start of the file, of the types CU list.
    Note that this section can be empty, in which case this offset will
@@ -15440,7 +15459,8 @@ write_psymtabs_to_index (struct objfile *objfile, const char *dir)
 
    5. The symbol table.  This is a hash table.  The size of the hash
    table is always a power of 2.  The initial hash and the step are
-   currently defined by the `find_slot' function.
+   currently defined by the `find_slot' function.  Index version 3 uses
+   a different hash function than index version 4 and later.
 
    Each slot in the hash table consists of a pair of offset_type
    values.  The first value is the offset of the symbol's name in the
--- a/gdb/minsyms.c
+++ b/gdb/minsyms.c
@@ -89,7 +89,7 @@ msymbol_hash_iw (const char *string)
 	++string;
       if (*string && *string != '(')
 	{
-	  hash = hash * 67 + *string - 113;
+	  hash = SYMBOL_HASH_NEXT (hash, *string);
 	  ++string;
 	}
     }
@@ -104,7 +104,7 @@ msymbol_hash (const char *string)
   unsigned int hash = 0;
 
   for (; *string; ++string)
-    hash = hash * 67 + *string - 113;
+    hash = SYMBOL_HASH_NEXT (hash, *string);
   return hash;
 }
 
@@ -244,6 +244,9 @@ lookup_minimal_symbol (const char *name, const char *sfile,
 		    {
 		      match = strcmp (SYMBOL_LINKAGE_NAME (msymbol),
 				      modified_name) == 0;
+		      if (!match && case_sensitivity == case_sensitive_off)
+			match = strcasecmp (SYMBOL_LINKAGE_NAME (msymbol),
+					    modified_name) == 0;
 		    }
 		  else
 		    {
--- a/gdb/psymtab.c
+++ b/gdb/psymtab.c
@@ -577,8 +577,15 @@ lookup_partial_symbol (struct partial_symtab *pst, const char *name,
       if (!(top == bottom))
 	internal_error (__FILE__, __LINE__, _("failed internal consistency check"));
 
-      while (top <= real_top
-	     && SYMBOL_MATCHES_SEARCH_NAME (*top, name))
+      /* For `case_sensitivity == case_sensitive_off' strcmp_iw_ordered will
+	 search more exactly than what matches SYMBOL_MATCHES_SEARCH_NAME.  */
+      while (top >= start && SYMBOL_MATCHES_SEARCH_NAME (*top, name))
+	top--;
+
+      /* Fixup to have a symbol which matches SYMBOL_MATCHES_SEARCH_NAME.  */
+      top++;
+
+      while (top <= real_top && SYMBOL_MATCHES_SEARCH_NAME (*top, name))
 	{
 	  if (symbol_matches_domain (SYMBOL_LANGUAGE (*top),
 				     SYMBOL_DOMAIN (*top), domain))
--- a/gdb/symtab.c
+++ b/gdb/symtab.c
@@ -1061,19 +1061,6 @@ lookup_symbol_in_language (const char *name, const struct block *block,
 	}
     }
 
-  if (case_sensitivity == case_sensitive_off)
-    {
-      char *copy;
-      int len, i;
-
-      len = strlen (name);
-      copy = (char *) alloca (len + 1);
-      for (i= 0; i < len; i++)
-        copy[i] = tolower (name[i]);
-      copy[len] = 0;
-      modified_name = copy;
-    }
-
   returnval = lookup_symbol_aux (modified_name, block, domain, lang,
 				 is_a_field_of_this);
   do_cleanups (cleanup);
--- a/gdb/symtab.h
+++ b/gdb/symtab.h
@@ -1012,6 +1012,13 @@ extern unsigned int msymbol_hash_iw (const char *);
 
 extern unsigned int msymbol_hash (const char *);
 
+/* This is only an internally computed value with no external files
+   compatibility requirements.  Compute next hash value from string
+   character C.  */
+
+#define SYMBOL_HASH_NEXT(hash, c) \
+  ((hash) * 67 + tolower ((unsigned char) (c)) - 113)
+
 extern struct objfile * msymbol_objfile (struct minimal_symbol *sym);
 
 extern void
--- /dev/null
+++ b/gdb/testsuite/gdb.base/fortran-sym-case.c
@@ -0,0 +1,22 @@
+/* This testcase is part of GDB, the GNU debugger.
+
+   Copyright 2010 Free Software Foundation, Inc.
+
+   This program is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3 of the License, or
+   (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program.  If not, see <http://www.gnu.org/licenses/>.  */
+
+int
+main (int argc, char **aRGv)
+{
+  return 0;
+}
--- /dev/null
+++ b/gdb/testsuite/gdb.base/fortran-sym-case.exp
@@ -0,0 +1,27 @@
+# Copyright (C) 2010 Free Software Foundation, Inc.
+
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+
+set testfile fortran-sym-case
+if { [prepare_for_testing ${testfile}.exp ${testfile}] } {
+    return -1
+}
+
+if ![runto_main] {
+    return -1
+}
+
+gdb_test "set language fortran" {Warning: the current language does not match this frame\.}
+
+gdb_test "frame" "aRGv=0x.*"
--- /dev/null
+++ b/gdb/testsuite/gdb.dwarf2/dw2-case-insensitive-debug.S
@@ -0,0 +1,78 @@
+/* This testcase is part of GDB, the GNU debugger.
+
+   Copyright 2010 Free Software Foundation, Inc.
+
+   This program is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3 of the License, or
+   (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program.  If not, see <http://www.gnu.org/licenses/>.  */
+
+	.section .debug_info
+.Lcu1_begin:
+	/* CU header */
+	.4byte	.Lcu1_end - .Lcu1_start		/* Length of Compilation Unit */
+.Lcu1_start:
+	.2byte	2				/* DWARF Version */
+	.4byte	.Labbrev1_begin			/* Offset into abbrev section */
+	.byte	4				/* Pointer size */
+
+	/* CU die */
+	.uleb128 1				/* Abbrev: DW_TAG_compile_unit */
+	.ascii	"file1.txt\0"			/* DW_AT_name */
+	.ascii	"GNU C 3.3.3\0"			/* DW_AT_producer */
+	.byte	8				/* DW_AT_language (DW_LANG_Fortran90) */
+	.4byte		FUNC_lang		/* DW_AT_low_pc */
+	.4byte		main			/* DW_AT_high_pc */
+
+	.uleb128	3			/* Abbrev: DW_TAG_subprogram */
+	.byte		1			/* DW_AT_external */
+	.ascii		"FUNC_lang\0"		/* DW_AT_name */
+	.4byte		FUNC_lang		/* DW_AT_low_pc */
+	.4byte		main			/* DW_AT_high_pc */
+
+	.byte		0			/* End of children of CU */
+.Lcu1_end:
+
+/* Abbrev table */
+	.section .debug_abbrev
+.Labbrev1_begin:
+	.uleb128	1			/* Abbrev code */
+	.uleb128	0x11			/* DW_TAG_compile_unit */
+	.byte		1			/* has_children */
+	.uleb128	0x3			/* DW_AT_name */
+	.uleb128	0x8			/* DW_FORM_string */
+	.uleb128	0x25			/* DW_AT_producer */
+	.uleb128	0x8			/* DW_FORM_string */
+	.uleb128	0x13			/* DW_AT_language */
+	.uleb128	0xb			/* DW_FORM_data1 */
+	.uleb128	0x11			/* DW_AT_low_pc */
+	.uleb128	0x1			/* DW_FORM_addr */
+	.uleb128	0x12			/* DW_AT_high_pc */
+	.uleb128	0x1			/* DW_FORM_addr */
+	.byte		0x0			/* Terminator */
+	.byte		0x0			/* Terminator */
+
+	.uleb128	3			/* Abbrev code */
+	.uleb128	0x2e			/* DW_TAG_subprogram */
+	.byte		0			/* has_children */
+	.uleb128	0x3f			/* DW_AT_external */
+	.uleb128	0xc			/* DW_FORM_flag */
+	.uleb128	0x3			/* DW_AT_name */
+	.uleb128	0x8			/* DW_FORM_string */
+	.uleb128	0x11			/* DW_AT_low_pc */
+	.uleb128	0x1			/* DW_FORM_addr */
+	.uleb128	0x12			/* DW_AT_high_pc */
+	.uleb128	0x1			/* DW_FORM_addr */
+	.byte		0x0			/* Terminator */
+	.byte		0x0			/* Terminator */
+
+	.byte		0x0			/* Terminator */
+	.byte		0x0			/* Terminator */
--- /dev/null
+++ b/gdb/testsuite/gdb.dwarf2/dw2-case-insensitive.c
@@ -0,0 +1,38 @@
+/* This testcase is part of GDB, the GNU debugger.
+
+   Copyright 2010 Free Software Foundation, Inc.
+
+   This program is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3 of the License, or
+   (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program.  If not, see <http://www.gnu.org/licenses/>.  */
+
+/* Use DW_LANG_Fortran90 for case insensitive DWARF.  */
+
+void
+FUNC_lang (void)
+{
+}
+
+/* Symbol is present only in ELF .symtab.  */
+
+void
+FUNC_symtab (void)
+{
+}
+
+int
+main()
+{
+  FUNC_lang ();
+  FUNC_symtab ();
+  return 0;
+}
--- /dev/null
+++ b/gdb/testsuite/gdb.dwarf2/dw2-case-insensitive.exp
@@ -0,0 +1,41 @@
+# Copyright 2010 Free Software Foundation, Inc.
+
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+
+# This test can only be run on targets which support DWARF-2 and use gas.
+# For now pick a sampling of likely targets.
+if {![istarget *-*-linux*]
+    && ![istarget *-*-gnu*]
+    && ![istarget *-*-elf*]
+    && ![istarget *-*-openbsd*]
+    && ![istarget arm-*-eabi*]
+    && ![istarget powerpc-*-eabi*]} {
+    return 0
+}
+
+set testfile "dw2-case-insensitive"
+
+if { [prepare_for_testing ${testfile}.exp ${testfile} [list ${testfile}.c ${testfile}-debug.S] {nodebug}] } {
+    return -1
+}
+
+gdb_test_no_output "set language fortran"
+
+if {[gdb_breakpoint "fuNC_lang"] == 1} {
+    pass "setting breakpoint at fuNC_lang"
+}
+
+if {[gdb_breakpoint "fuNC_symtab"] == 1} {
+    pass "setting breakpoint at fuNC_symtab"
+}
--- a/gdb/utils.c
+++ b/gdb/utils.c
@@ -2927,10 +2927,12 @@ strcmp_iw (const char *string1, const char *string2)
 	{
 	  string2++;
 	}
-      if (*string1 != *string2)
-	{
-	  break;
-	}
+      if (case_sensitivity == case_sensitive_on && *string1 != *string2)
+	break;
+      if (case_sensitivity == case_sensitive_off
+	  && (tolower ((unsigned char) *string1)
+	      != tolower ((unsigned char) *string2)))
+	break;
       if (*string1 != '\0')
 	{
 	  string1++;
@@ -2986,10 +2988,12 @@ strcmp_iw_ordered (const char *string1, const char *string2)
 	{
 	  string2++;
 	}
-      if (*string1 != *string2)
-	{
-	  break;
-	}
+      if (case_sensitivity == case_sensitive_on && *string1 != *string2)
+	break;
+      if (case_sensitivity == case_sensitive_off
+	  && (tolower ((unsigned char) *string1)
+	      != tolower ((unsigned char) *string2)))
+	break;
       if (*string1 != '\0')
 	{
 	  string1++;
@@ -3015,8 +3019,11 @@ strcmp_iw_ordered (const char *string1, const char *string2)
     default:
       if (*string2 == '(')
 	return 1;
-      else
+      else if (case_sensitivity == case_sensitive_on)
 	return *string1 - *string2;
+      else
+	return (tolower ((unsigned char) *string1)
+		- tolower ((unsigned char) *string2));
     }
 }
 


More information about the Gdb mailing list