PATCH: debuginfod, db size reduction

Frank Ch. Eigler fche@elastic.org
Fri Sep 18 17:41:49 GMT 2020


>From 04bcab70c7dcb9c1bc7ca49508b5d7cbd5aeaa1a Mon Sep 17 00:00:00 2001
From: "Frank Ch. Eigler git" <fche@elastic.org>
Date: Fri, 18 Sep 2020 13:03:01 -0400
Subject: [PATCH] debuginfod: store only canonicalized sref pathnames in
 database

From: Frank Ch. Eigler <fche@redhat.com>

Since PR25548, we let debuginfod answer /buildid/HEX/source/PATH
queries with both canonicalized and raw PATHs.  It canonicalizes
incoming paths, but still stored the raw paths in the database too.
This near-dupe storage is not needed, since the queries would always
find the canonicalized version too, so stop doing that.  This saves
database space/time.

Signed-off-by: Frank Ch. Eigler <fche@redhat.com>
---
 debuginfod/ChangeLog      |  5 +++
 debuginfod/debuginfod.cxx | 69 +++++++++++++--------------------------
 2 files changed, 28 insertions(+), 46 deletions(-)

diff --git a/debuginfod/ChangeLog b/debuginfod/ChangeLog
index a8e0ac5ecf0f..8cb89967e9d1 100644
--- a/debuginfod/ChangeLog
+++ b/debuginfod/ChangeLog
@@ -1,3 +1,8 @@
+2020-09-18  Frank Ch. Eigler <fche@redhat.com>
+
+	* debuginfod.cxx (scan_source_file, archive_classify): Store only
+	canonicalized file names in sdef & sref records in the database.
+
 2020-09-08  Mark Wielaard  <mark@klomp.org>
 
 	* Makefile.am (BUILD_STATIC): Include libcurl_LIBS in libdebuginfod
diff --git a/debuginfod/debuginfod.cxx b/debuginfod/debuginfod.cxx
index 5621030292e8..140b7789de3b 100644
--- a/debuginfod/debuginfod.cxx
+++ b/debuginfod/debuginfod.cxx
@@ -1505,6 +1505,8 @@ handle_buildid (MHD_Connection* conn,
                           "order by sharedprefix(source0,source0ref) desc, mtime desc");
       pp->reset();
       pp->bind(1, buildid);
+      // NB: we don't store the non-canonicalized path names any more, but old databases
+      // might have them (and no canon ones), so we keep searching for both.
       pp->bind(2, suffix);
       pp->bind(3, canon_pathname(suffix));
     }
@@ -2254,41 +2256,27 @@ scan_source_file (const string& rps, const stat_t& st,
             .bind(1, srps)
             .step_ok_done();
 
-          // register the dwarfsrc name in the interning table too
+          // PR25548: store canonicalized dwarfsrc path
+          string dwarfsrc_canon = canon_pathname (dwarfsrc);
+          if (dwarfsrc_canon != dwarfsrc)
+            {
+              if (verbose > 3)
+                obatched(clog) << "canonicalized src=" << dwarfsrc << " alias=" << dwarfsrc_canon << endl;
+            }
+          
           ps_upsert_files
             .reset()
-            .bind(1, dwarfsrc)
+            .bind(1, dwarfsrc_canon)
             .step_ok_done();
 
           ps_upsert_s
             .reset()
             .bind(1, buildid)
-            .bind(2, dwarfsrc)
+            .bind(2, dwarfsrc_canon)
             .bind(3, srps)
             .bind(4, sfs.st_mtime)
             .step_ok_done();
 
-          // PR25548: also store canonicalized source path
-          string dwarfsrc_canon = canon_pathname (dwarfsrc);
-          if (dwarfsrc_canon != dwarfsrc)
-            {
-              if (verbose > 3)
-                obatched(clog) << "canonicalized src=" << dwarfsrc << " alias=" << dwarfsrc_canon << endl;
-
-              ps_upsert_files
-                .reset()
-                .bind(1, dwarfsrc_canon)
-                .step_ok_done();
-
-              ps_upsert_s
-                .reset()
-                .bind(1, buildid)
-                .bind(2, dwarfsrc_canon)
-                .bind(3, srps)
-                .bind(4, sfs.st_mtime)
-                .step_ok_done();
-            }
-
           inc_metric("found_sourcerefs_total","source","files");
         }
     }
@@ -2439,37 +2427,26 @@ archive_classify (const string& rps, string& archive_extension,
                       continue;
                     }
 
+                  // PR25548: store canonicalized source path
+                  const string& dwarfsrc = s;
+                  string dwarfsrc_canon = canon_pathname (dwarfsrc);
+                  if (dwarfsrc_canon != dwarfsrc)
+                    {
+                      if (verbose > 3)
+                        obatched(clog) << "canonicalized src=" << dwarfsrc << " alias=" << dwarfsrc_canon << endl;
+                    }
+                  
                   ps_upsert_files
                     .reset()
-                    .bind(1, s)
+                    .bind(1, dwarfsrc_canon)
                     .step_ok_done();
 
                   ps_upsert_sref
                     .reset()
                     .bind(1, buildid)
-                    .bind(2, s)
+                    .bind(2, dwarfsrc_canon)
                     .step_ok_done();
 
-                  // PR25548: also store canonicalized source path
-                  const string& dwarfsrc = s;
-                  string dwarfsrc_canon = canon_pathname (dwarfsrc);
-                  if (dwarfsrc_canon != dwarfsrc)
-                    {
-                      if (verbose > 3)
-                        obatched(clog) << "canonicalized src=" << dwarfsrc << " alias=" << dwarfsrc_canon << endl;
-
-                      ps_upsert_files
-                        .reset()
-                        .bind(1, dwarfsrc_canon)
-                        .step_ok_done();
-
-                      ps_upsert_sref
-                        .reset()
-                        .bind(1, buildid)
-                        .bind(2, dwarfsrc_canon)
-                        .step_ok_done();
-                    }
-
                   fts_sref ++;
                 }
             }
-- 
2.26.2



More information about the Elfutils-devel mailing list