[PATCH] PR25756 more debuginfod metrics

Frank Ch. Eigler fche@redhat.com
Wed Oct 21 00:44:59 GMT 2020


Subject: [PATCH 1/2] PR26756: add more prometheus metrics to debuginfod

From: Frank Ch. Eigler <fche@redhat.com>

Add an error_count{} family of metrics for each libc/libarchive/http
exception instance created during operation.  Add a family of fdcache*
metrics for tracking fdcache operations and status.  Test via a
injecting a permission-000 empty nothing.rpm in the testsuite.

Signed-off-by: Frank Ch. Eigler <fche@redhat.com>
---
 debuginfod/ChangeLog         |  6 ++++
 debuginfod/debuginfod.cxx    | 56 +++++++++++++++++++++++++++---------
 tests/ChangeLog              |  6 ++++
 tests/run-debuginfod-find.sh | 12 ++++++--
 4 files changed, 63 insertions(+), 17 deletions(-)

diff --git a/debuginfod/ChangeLog b/debuginfod/ChangeLog
index 8cb89967e9d1..236e2ca3270f 100644
--- a/debuginfod/ChangeLog
+++ b/debuginfod/ChangeLog
@@ -1,3 +1,9 @@
+2020-10-20  Frank Ch. Eigler  <fche@redhat.com>
+
+	PR26756: more prometheus metrics
+	* debuginfod.cxx (*_exception): Add counters for error occurrences.
+	(fdcache::*): Add counters for fdcache operations and status.
+
 2020-09-18  Frank Ch. Eigler <fche@redhat.com>
 
 	* debuginfod.cxx (scan_source_file, archive_classify): Store only
diff --git a/debuginfod/debuginfod.cxx b/debuginfod/debuginfod.cxx
index 140b7789de3b..a5334d2a7fc5 100644
--- a/debuginfod/debuginfod.cxx
+++ b/debuginfod/debuginfod.cxx
@@ -548,23 +548,31 @@ struct sqlite_exception: public reportable_exception
 struct libc_exception: public reportable_exception
 {
   libc_exception(int rc, const string& msg):
-    reportable_exception(string("libc error: ") + msg + ": " + string(strerror(rc) ?: "?")) {}
+    reportable_exception(string("libc error: ") + msg + ": " + string(strerror(rc) ?: "?")) {
+    inc_metric("error_count","libc",strerror(rc));
+  }
 };
 
 
 struct archive_exception: public reportable_exception
 {
   archive_exception(const string& msg):
-    reportable_exception(string("libarchive error: ") + msg) {}
+    reportable_exception(string("libarchive error: ") + msg) {
+      inc_metric("error_count","libarchive",msg);
+  }
   archive_exception(struct archive* a, const string& msg):
-    reportable_exception(string("libarchive error: ") + msg + ": " + string(archive_error_string(a) ?: "?")) {}
+    reportable_exception(string("libarchive error: ") + msg + ": " + string(archive_error_string(a) ?: "?")) {
+    inc_metric("error_count","libarchive",msg);
+  }
 };
 
 
 struct elfutils_exception: public reportable_exception
 {
   elfutils_exception(int rc, const string& msg):
-    reportable_exception(string("elfutils error: ") + msg + ": " + string(elf_errmsg(rc) ?: "?")) {}
+    reportable_exception(string("elfutils error: ") + msg + ": " + string(elf_errmsg(rc) ?: "?")) {
+    inc_metric("error_count","elfutils",elf_errmsg(rc));
+  }
 };
 
 
@@ -1083,6 +1091,15 @@ class libarchive_fdcache
   long max_mbs;
 
 public:
+  void set_metrics()
+  {
+    double total_mb = 0.0;
+    for (auto i = lru.begin(); i < lru.end(); i++)
+      total_mb += i->fd_size_mb;
+    set_metric("fdcache_bytes", (int64_t)(total_mb*1024.0*1024.0));
+    set_metric("fdcache_count", lru.size());
+  }
+
   void intern(const string& a, const string& b, string fd, off_t sz, bool front_p)
   {
     {
@@ -1093,19 +1110,24 @@ class libarchive_fdcache
             {
               unlink (i->fd.c_str());
               lru.erase(i);
+              inc_metric("fdcache_op_count","op","dequeue");
               break; // must not continue iterating
             }
         }
       double mb = (sz+65535)/1048576.0; // round up to 64K block
       fdcache_entry n = { a, b, fd, mb };
-      if (front_p)
+      if (front_p) {
+        inc_metric("fdcache_op_count","op","enqueue_front");
         lru.push_front(n);
-      else
+      } else {
+        inc_metric("fdcache_op_count","op","enqueue_back");
         lru.push_back(n);
-    if (verbose > 3)
-      obatched(clog) << "fdcache interned a=" << a << " b=" << b
-                     << " fd=" << fd << " mb=" << mb << " front=" << front_p << endl;
+      }
+      if (verbose > 3)
+        obatched(clog) << "fdcache interned a=" << a << " b=" << b
+                       << " fd=" << fd << " mb=" << mb << " front=" << front_p << endl;
     }
+    set_metrics();
 
     // NB: we age the cache at lookup time too
     if (front_p)
@@ -1124,7 +1146,7 @@ class libarchive_fdcache
               fdcache_entry n = *i;
               lru.erase(i); // invalidates i, so no more iteration!
               lru.push_front(n);
-
+              inc_metric("fdcache_op_count","op","requeue_front");
               fd = open(n.fd.c_str(), O_RDONLY); // NB: no problem if dup() fails; looks like cache miss
               break;
             }
@@ -1142,9 +1164,12 @@ class libarchive_fdcache
     unique_lock<mutex> lock(fdcache_lock);
     for (auto i = lru.begin(); i < lru.end(); i++)
       {
-        if (i->archive == a && i->entry == b)
+        if (i->archive == a && i->entry == b) {
+          inc_metric("fdcache_op_count","op","probe_hit");
           return true;
+        }
       }
+    inc_metric("fdcache_op_count","op","probe_miss");
     return false;
   }
 
@@ -1157,7 +1182,9 @@ class libarchive_fdcache
           { // found it; move it to head of lru
             fdcache_entry n = *i;
             lru.erase(i); // invalidates i, so no more iteration!
+            inc_metric("fdcache_op_count","op","clear");
             unlink (n.fd.c_str());
+            set_metrics();
             return;
           }
       }
@@ -1188,14 +1215,15 @@ class libarchive_fdcache
                 if (verbose > 3)
                   obatched(clog) << "fdcache evicted a=" << j->archive << " b=" << j->entry
                                  << " fd=" << j->fd << " mb=" << j->fd_size_mb << endl;
+                inc_metric("fdcache_op_count","op","evict");
                 unlink (j->fd.c_str());
               }
 
             lru.erase(i, lru.end()); // erase the nodes generally
             break;
           }
-
       }
+    set_metrics();
   }
 
   ~libarchive_fdcache()
@@ -2263,7 +2291,7 @@ scan_source_file (const string& rps, const stat_t& st,
               if (verbose > 3)
                 obatched(clog) << "canonicalized src=" << dwarfsrc << " alias=" << dwarfsrc_canon << endl;
             }
-          
+
           ps_upsert_files
             .reset()
             .bind(1, dwarfsrc_canon)
@@ -2435,7 +2463,7 @@ archive_classify (const string& rps, string& archive_extension,
                       if (verbose > 3)
                         obatched(clog) << "canonicalized src=" << dwarfsrc << " alias=" << dwarfsrc_canon << endl;
                     }
-                  
+
                   ps_upsert_files
                     .reset()
                     .bind(1, dwarfsrc_canon)
diff --git a/tests/ChangeLog b/tests/ChangeLog
index 5a8b5899b9c5..aa68ffd383e8 100644
--- a/tests/ChangeLog
+++ b/tests/ChangeLog
@@ -1,3 +1,9 @@
+2020-10-20  Frank Ch. Eigler  <fche@redhat.com>
+
+	PR26756: more prometheus metrics
+	* run-debuginfod-find.sh: Trigger some errors with dummy "nothing.rpm"
+	and check for new metrics.
+
 2020-09-18  Mark Wielaard  <mark@klomp.org>
 
 	* run-readelf-compressed-zstd.sh: New test.
diff --git a/tests/run-debuginfod-find.sh b/tests/run-debuginfod-find.sh
index 730bb0e10b47..79976f70dc92 100755
--- a/tests/run-debuginfod-find.sh
+++ b/tests/run-debuginfod-find.sh
@@ -95,6 +95,10 @@ wait_ready()
   fi
 }
 
+# create a 000 empty .rpm file to evoke a metric-visible error
+touch R/nothing.rpm
+chmod 000 R/nothing.rpm
+
 env LD_LIBRARY_PATH=$ldpath DEBUGINFOD_URLS= ${abs_builddir}/../debuginfod/debuginfod $VERBOSE -F -R -d $DB -p $PORT1 -t0 -g0 --fdcache-fds 1 --fdcache-mbs 2 -Z .tar.xz -Z .tar.bz2=bzcat -v R F Z L > vlog4 2>&1 &
 PID1=$!
 tempfiles vlog4
@@ -240,8 +244,8 @@ fi
 
 cp -rvp ${abs_srcdir}/debuginfod-tars Z
 kill -USR1 $PID1
-# All rpms need to be in the index
-rpms=$(find R -name \*rpm | wc -l)
+# All rpms need to be in the index, except the dummy permission-000 one
+rpms=$(find R -name \*rpm | grep -v nothing | wc -l)
 wait_ready $PORT1 'scanned_total{source=".rpm archive"}' $rpms
 txz=$(find Z -name \*tar.xz | wc -l)
 wait_ready $PORT1 'scanned_total{source=".tar.xz archive"}' $txz
@@ -255,7 +259,7 @@ kill -USR1 $PID1  # two hits of SIGUSR1 may be needed to resolve .debug->dwz->sr
 mkdir extracted
 cd extracted
 subdir=0;
-newrpms=$(find ../R -name \*\.rpm)
+newrpms=$(find ../R -name \*\.rpm | grep -v nothing)
 for i in $newrpms; do
     subdir=$[$subdir+1];
     mkdir $subdir;
@@ -440,6 +444,8 @@ curl -s http://127.0.0.1:$PORT1/metrics | grep 'http_responses_duration_millisec
 curl -s http://127.0.0.1:$PORT1/metrics | grep 'http_responses_duration_milliseconds_sum'
 curl -s http://127.0.0.1:$PORT1/metrics | grep 'http_responses_transfer_bytes_count'
 curl -s http://127.0.0.1:$PORT1/metrics | grep 'http_responses_transfer_bytes_sum'
+curl -s http://127.0.0.1:$PORT1/metrics | grep 'fdcache_'
+curl -s http://127.0.0.1:$PORT1/metrics | grep 'error_count'
 
 # And generate a few errors into the second debuginfod's logs, for analysis just below
 curl -s http://127.0.0.1:$PORT2/badapi > /dev/null || true
-- 
2.26.2



More information about the Elfutils-devel mailing list