[PATCH v2] [gdb] Add spell check pre-commit hook

Tom de Vries tdevries@suse.de
Tue Nov 12 13:06:32 GMT 2024


Doing gdb/contrib/spellcheck.sh --check can be slow on large files.

[ The assumption below is that .git/wikipedia-common-misspellings.txt has been
downloaded and that .git/spell-check.pat1.$md5sum has been generated. ]

Say we add typo "upto" at the first line of gdb/doc/gdb.texinfo, we get a
quick result:
...
$ time ./gdb/contrib/spellcheck.sh --check gdb/doc/gdb.texinfo

real    0m0,155s
user    0m0,012s
sys     0m0,029s
...
but if we add the typo to the last line instead, we get:
...
$ time ./gdb/contrib/spellcheck.sh --check gdb/doc/gdb.texinfo

real    0m9,234s
user    0m0,013s
sys     0m0,028s
...
and likewise if there's no typo.

Add a --staged option to gdb/contrib/spellcheck.sh that doesn't look at the
entire file, but only at the staged changes:
...
$ git add gdb/doc/gdb.texinfo
$ time ./gdb/contrib/spellcheck.sh --staged gdb/doc/gdb.texinfo
Spell check failed in /home/vries/gdb/binutils-gdb.git/gdb/doc/gdb.texinfo

real    0m0,218s
user    0m0,040s
sys     0m0,031s
...
which speeds things up a bit.

Now that it's no longer unreasonably slow, add a call to
gdb/contrib/spellcheck.sh --staged in .pre-commit-config.yaml.

Tested on aarch64-linux.  Verified with shellcheck.
---
 .pre-commit-config.yaml   |  7 +++++++
 gdb/contrib/spellcheck.sh | 44 ++++++++++++++++++++++++++++++++++++++-
 2 files changed, 50 insertions(+), 1 deletion(-)

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 87726aeb758..6be4328f8c1 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -22,3 +22,10 @@ repos:
     - id: isort
       types_or: [file]
       files: 'gdb/.*\.py(\.in)?$'
+  - repo: local
+    hooks:
+    - id: spell-check
+      name: spell check
+      language: script
+      entry: ./gdb/contrib/spellcheck.sh --staged
+      files: ^(gdb|gdbsupport|gdbserver)/
diff --git a/gdb/contrib/spellcheck.sh b/gdb/contrib/spellcheck.sh
index 1b3e88e259b..61ef15ef83a 100755
--- a/gdb/contrib/spellcheck.sh
+++ b/gdb/contrib/spellcheck.sh
@@ -121,6 +121,10 @@ parse_args ()
 		check=true
 		shift
 		;;
+	    " --staged ")
+		staged=true
+		shift
+		;;
 	    *)
 		break
 		;;
@@ -357,6 +361,7 @@ main ()
 {
     declare -a unique_files
     check=false
+    staged=false
     parse_args "$@"
 
     get_dictionary
@@ -365,9 +370,46 @@ main ()
     declare -a replacements
     parse_dictionary
 
-    # Reduce set of files for sed to operate on.
     local files_matching_words
     declare -a files_matching_words
+
+    if $staged; then
+	declare -a tmpfiles
+	tmpfiles=()
+	trap 'rm -f "${tmpfiles[@]}"' EXIT
+
+	local orgfiles
+	declare -A orgfiles
+
+	for f in "${unique_files[@]}"; do
+	    local tmp
+	    tmp=$(mktemp)
+	    tmpfiles=("${tmpfiles[@]}" "$tmp")
+	    orgfiles[$tmp]="$f"
+
+	    git diff --staged --minimal "$f" \
+		| tail -n +5 \
+		| grep -E "^\+" \
+		| sed 's/^\+//' \
+		      > "$tmp"
+	done
+
+	mapfile -t files_matching_words \
+		< <(find_files_matching_words "${tmpfiles[@]}")
+
+	if [ ${#files_matching_words[@]} -eq 0 ]; then
+	    ret=0
+	else
+	    for f in "${files_matching_words[@]}"; do
+		echo "Spell check failed in ${orgfiles[$f]}"
+	    done
+	    ret=1
+	fi
+
+	exit $ret
+    fi
+
+    # Reduce set of files for sed to operate on.
     mapfile -t files_matching_words \
 	    < <(find_files_matching_words "${unique_files[@]}")
 

base-commit: 3d6f66d8bb6c539bc89b55b1add85dc6f53f2011
-- 
2.35.3



More information about the Gdb-patches mailing list