x86: Update large memcpy case in memmove-vec-unaligned-erms.S

author noah <goldstein.w.n@gmail.com>

Sat, 3 Apr 2021 08:12:15 +0000 (04:12 -0400)

committer H.J. Lu <hjl.tools@gmail.com>

Fri, 16 Apr 2021 17:06:56 +0000 (10:06 -0700)
author noah <goldstein.w.n@gmail.com>
Sat, 3 Apr 2021 08:12:15 +0000 (04:12 -0400)
committer H.J. Lu <hjl.tools@gmail.com>
Fri, 16 Apr 2021 17:06:56 +0000 (10:06 -0700)
diff --git a/sysdeps/x86_64/multiarch/memmove-vec-unaligned-erms.S b/sysdeps/x86_64/multiarch/memmove-vec-unaligned-erms.S

index 897a3d976275b7bf3cd7b76d3ffaef2bd43ece59..5e4a071f16264e77a99792323d59a34b11bb9626 100644 (file)
--- a/sysdeps/x86_64/multiarch/memmove-vec-unaligned-erms.S
+++ b/sysdeps/x86_64/multiarch/memmove-vec-unaligned-erms.S
@@ -35,7 +35,16 @@
        __x86_rep_movsb_stop_threshold, then REP MOVSB will be used.
     7. If size >= __x86_shared_non_temporal_threshold and there is no
        overlap between destination and source, use non-temporal store
-      instead of aligned store.  */
+      instead of aligned store copying from either 2 or 4 pages at
+      once.
+   8. For point 7) if size < 16 * __x86_shared_non_temporal_threshold
+      and source and destination do not page alias, copy from 2 pages
+      at once using non-temporal stores. Page aliasing in this case is
+      considered true if destination's page alignment - sources' page
+      alignment is less than 8 * VEC_SIZE.
+   9. If size >= 16 * __x86_shared_non_temporal_threshold or source
+      and destination do page alias copy from 4 pages at once using
+      non-temporal stores.  */
  
  #include <sysdep.h>
  
@@ -67,6 +76,34 @@
  # endif
  #endif
  
+#ifndef PAGE_SIZE
+# define PAGE_SIZE 4096
+#endif
+
+#if PAGE_SIZE != 4096
+# error Unsupported PAGE_SIZE
+#endif
+
+#ifndef LOG_PAGE_SIZE
+# define LOG_PAGE_SIZE 12
+#endif
+
+#if PAGE_SIZE != (1 << LOG_PAGE_SIZE)
+# error Invalid LOG_PAGE_SIZE
+#endif
+
+/* Byte per page for large_memcpy inner loop.  */
+#if VEC_SIZE == 64
+# define LARGE_LOAD_SIZE (VEC_SIZE * 2)
+#else
+# define LARGE_LOAD_SIZE (VEC_SIZE * 4)
+#endif
+
+/* Amount to shift rdx by to compare for memcpy_large_4x.  */
+#ifndef LOG_4X_MEMCPY_THRESH
+# define LOG_4X_MEMCPY_THRESH 4
+#endif
+
  /* Avoid short distance rep movsb only with non-SSE vector.  */
  #ifndef AVOID_SHORT_DISTANCE_REP_MOVSB
  # define AVOID_SHORT_DISTANCE_REP_MOVSB (VEC_SIZE > 16)
@@ -106,6 +143,28 @@
  # error Unsupported PREFETCH_SIZE!
  #endif
  
+#if LARGE_LOAD_SIZE == (VEC_SIZE * 2)
+# define LOAD_ONE_SET(base, offset, vec0, vec1, ...) \
+       VMOVU   (offset)base, vec0; \
+       VMOVU   ((offset) + VEC_SIZE)base, vec1;
+# define STORE_ONE_SET(base, offset, vec0, vec1, ...) \
+       VMOVNT  vec0, (offset)base; \
+       VMOVNT  vec1, ((offset) + VEC_SIZE)base;
+#elif LARGE_LOAD_SIZE == (VEC_SIZE * 4)
+# define LOAD_ONE_SET(base, offset, vec0, vec1, vec2, vec3) \
+       VMOVU   (offset)base, vec0; \
+       VMOVU   ((offset) + VEC_SIZE)base, vec1; \
+       VMOVU   ((offset) + VEC_SIZE * 2)base, vec2; \
+       VMOVU   ((offset) + VEC_SIZE * 3)base, vec3;
+# define STORE_ONE_SET(base, offset, vec0, vec1, vec2, vec3) \
+       VMOVNT  vec0, (offset)base; \
+       VMOVNT  vec1, ((offset) + VEC_SIZE)base; \
+       VMOVNT  vec2, ((offset) + VEC_SIZE * 2)base; \
+       VMOVNT  vec3, ((offset) + VEC_SIZE * 3)base;
+#else
+# error Invalid LARGE_LOAD_SIZE
+#endif
+
  #ifndef SECTION
  # error SECTION is not defined!
  #endif
@@ -393,6 +452,15 @@ L(last_4x_vec):
         VZEROUPPER_RETURN
  
  L(more_8x_vec):
+       /* Check if non-temporal move candidate.  */
+#if (defined USE_MULTIARCH || VEC_SIZE == 16) && IS_IN (libc)
+       /* Check non-temporal store threshold.  */
+       cmp __x86_shared_non_temporal_threshold(%rip), %RDX_LP
+       ja      L(large_memcpy_2x)
+#endif
+       /* Entry if rdx is greater than non-temporal threshold but there
+       is overlap.  */
+L(more_8x_vec_check):
         cmpq    %rsi, %rdi
         ja      L(more_8x_vec_backward)
         /* Source == destination is less common.  */
@@ -419,24 +487,21 @@ L(more_8x_vec):
         subq    %r8, %rdi
         /* Adjust length.  */
         addq    %r8, %rdx
-#if (defined USE_MULTIARCH || VEC_SIZE == 16) && IS_IN (libc)
-       /* Check non-temporal store threshold.  */
-       cmp     __x86_shared_non_temporal_threshold(%rip), %RDX_LP
-       ja      L(large_forward)
-#endif
+
+       .p2align 4
  L(loop_4x_vec_forward):
         /* Copy 4 * VEC a time forward.  */
         VMOVU   (%rsi), %VEC(0)
         VMOVU   VEC_SIZE(%rsi), %VEC(1)
         VMOVU   (VEC_SIZE * 2)(%rsi), %VEC(2)
         VMOVU   (VEC_SIZE * 3)(%rsi), %VEC(3)
-       addq    $(VEC_SIZE * 4), %rsi
-       subq    $(VEC_SIZE * 4), %rdx
+       subq    $-(VEC_SIZE * 4), %rsi
+       addq    $-(VEC_SIZE * 4), %rdx
         VMOVA   %VEC(0), (%rdi)
         VMOVA   %VEC(1), VEC_SIZE(%rdi)
         VMOVA   %VEC(2), (VEC_SIZE * 2)(%rdi)
         VMOVA   %VEC(3), (VEC_SIZE * 3)(%rdi)
-       addq    $(VEC_SIZE * 4), %rdi
+       subq    $-(VEC_SIZE * 4), %rdi
         cmpq    $(VEC_SIZE * 4), %rdx
         ja      L(loop_4x_vec_forward)
         /* Store the last 4 * VEC.  */
@@ -470,24 +535,21 @@ L(more_8x_vec_backward):
         subq    %r8, %r9
         /* Adjust length.  */
         subq    %r8, %rdx
-#if (defined USE_MULTIARCH || VEC_SIZE == 16) && IS_IN (libc)
-       /* Check non-temporal store threshold.  */
-       cmp     __x86_shared_non_temporal_threshold(%rip), %RDX_LP
-       ja      L(large_backward)
-#endif
+
+       .p2align 4
  L(loop_4x_vec_backward):
         /* Copy 4 * VEC a time backward.  */
         VMOVU   (%rcx), %VEC(0)
         VMOVU   -VEC_SIZE(%rcx), %VEC(1)
         VMOVU   -(VEC_SIZE * 2)(%rcx), %VEC(2)
         VMOVU   -(VEC_SIZE * 3)(%rcx), %VEC(3)
-       subq    $(VEC_SIZE * 4), %rcx
-       subq    $(VEC_SIZE * 4), %rdx
+       addq    $-(VEC_SIZE * 4), %rcx
+       addq    $-(VEC_SIZE * 4), %rdx
         VMOVA   %VEC(0), (%r9)
         VMOVA   %VEC(1), -VEC_SIZE(%r9)
         VMOVA   %VEC(2), -(VEC_SIZE * 2)(%r9)
         VMOVA   %VEC(3), -(VEC_SIZE * 3)(%r9)
-       subq    $(VEC_SIZE * 4), %r9
+       addq    $-(VEC_SIZE * 4), %r9
         cmpq    $(VEC_SIZE * 4), %rdx
         ja      L(loop_4x_vec_backward)
         /* Store the first 4 * VEC.  */
@@ -500,72 +562,202 @@ L(loop_4x_vec_backward):
         VZEROUPPER_RETURN
  
  #if (defined USE_MULTIARCH || VEC_SIZE == 16) && IS_IN (libc)
-L(large_forward):
+       .p2align 4
+L(large_memcpy_2x):
+       /* Compute absolute value of difference between source and
+          destination.  */
+       movq    %rdi, %r9
+       subq    %rsi, %r9
+       movq    %r9, %r8
+       leaq    -1(%r9), %rcx
+       sarq    $63, %r8
+       xorq    %r8, %r9
+       subq    %r8, %r9
         /* Don't use non-temporal store if there is overlap between
-          destination and source since destination may be in cache
-          when source is loaded.  */
-       leaq    (%rdi, %rdx), %r10
-       cmpq    %r10, %rsi
-       jb      L(loop_4x_vec_forward)
-L(loop_large_forward):
+          destination and source since destination may be in cache when
+          source is loaded.  */
+       cmpq    %r9, %rdx
+       ja      L(more_8x_vec_check)
+
+       /* Cache align destination. First store the first 64 bytes then
+          adjust alignments.  */
+       VMOVU   (%rsi), %VEC(8)
+#if VEC_SIZE < 64
+       VMOVU   VEC_SIZE(%rsi), %VEC(9)
+#if VEC_SIZE < 32
+       VMOVU   (VEC_SIZE * 2)(%rsi), %VEC(10)
+       VMOVU   (VEC_SIZE * 3)(%rsi), %VEC(11)
+#endif
+#endif
+       VMOVU   %VEC(8), (%rdi)
+#if VEC_SIZE < 64
+       VMOVU   %VEC(9), VEC_SIZE(%rdi)
+#if VEC_SIZE < 32
+       VMOVU   %VEC(10), (VEC_SIZE * 2)(%rdi)
+       VMOVU   %VEC(11), (VEC_SIZE * 3)(%rdi)
+#endif
+#endif
+       /* Adjust source, destination, and size.  */
+       movq    %rdi, %r8
+       andq    $63, %r8
+       /* Get the negative of offset for alignment.  */
+       subq    $64, %r8
+       /* Adjust source.  */
+       subq    %r8, %rsi
+       /* Adjust destination which should be aligned now.  */
+       subq    %r8, %rdi
+       /* Adjust length.  */
+       addq    %r8, %rdx
+
+       /* Test if source and destination addresses will alias. If they do
+          the larger pipeline in large_memcpy_4x alleviated the
+          performance drop.  */
+       testl   $(PAGE_SIZE - VEC_SIZE * 8), %ecx
+       jz      L(large_memcpy_4x)
+
+       movq    %rdx, %r10
+       shrq    $LOG_4X_MEMCPY_THRESH, %r10
+       cmp     __x86_shared_non_temporal_threshold(%rip), %r10
+       jae     L(large_memcpy_4x)
+
+       /* edx will store remainder size for copying tail.  */
+       andl    $(PAGE_SIZE * 2 - 1), %edx
+       /* r10 stores outer loop counter.  */
+       shrq    $((LOG_PAGE_SIZE + 1) - LOG_4X_MEMCPY_THRESH), %r10
+       /* Copy 4x VEC at a time from 2 pages.  */
+       .p2align 4
+L(loop_large_memcpy_2x_outer):
+       /* ecx stores inner loop counter.  */
+       movl    $(PAGE_SIZE / LARGE_LOAD_SIZE), %ecx
+L(loop_large_memcpy_2x_inner):
+       PREFETCH_ONE_SET(1, (%rsi), PREFETCHED_LOAD_SIZE)
+       PREFETCH_ONE_SET(1, (%rsi), PREFETCHED_LOAD_SIZE * 2)
+       PREFETCH_ONE_SET(1, (%rsi), PAGE_SIZE + PREFETCHED_LOAD_SIZE)
+       PREFETCH_ONE_SET(1, (%rsi), PAGE_SIZE + PREFETCHED_LOAD_SIZE * 2)
+       /* Load vectors from rsi.  */
+       LOAD_ONE_SET((%rsi), 0, %VEC(0), %VEC(1), %VEC(2), %VEC(3))
+       LOAD_ONE_SET((%rsi), PAGE_SIZE, %VEC(4), %VEC(5), %VEC(6), %VEC(7))
+       subq    $-LARGE_LOAD_SIZE, %rsi
+       /* Non-temporal store vectors to rdi.  */
+       STORE_ONE_SET((%rdi), 0, %VEC(0), %VEC(1), %VEC(2), %VEC(3))
+       STORE_ONE_SET((%rdi), PAGE_SIZE, %VEC(4), %VEC(5), %VEC(6), %VEC(7))
+       subq    $-LARGE_LOAD_SIZE, %rdi
+       decl    %ecx
+       jnz     L(loop_large_memcpy_2x_inner)
+       addq    $PAGE_SIZE, %rdi
+       addq    $PAGE_SIZE, %rsi
+       decq    %r10
+       jne     L(loop_large_memcpy_2x_outer)
+       sfence
+
+       /* Check if only last 4 loads are needed.  */
+       cmpl    $(VEC_SIZE * 4), %edx
+       jbe     L(large_memcpy_2x_end)
+
+       /* Handle the last 2 * PAGE_SIZE bytes.  */
+L(loop_large_memcpy_2x_tail):
         /* Copy 4 * VEC a time forward with non-temporal stores.  */
-       PREFETCH_ONE_SET (1, (%rsi), PREFETCHED_LOAD_SIZE * 2)
-       PREFETCH_ONE_SET (1, (%rsi), PREFETCHED_LOAD_SIZE * 3)
+       PREFETCH_ONE_SET (1, (%rsi), PREFETCHED_LOAD_SIZE)
+       PREFETCH_ONE_SET (1, (%rdi), PREFETCHED_LOAD_SIZE)
         VMOVU   (%rsi), %VEC(0)
         VMOVU   VEC_SIZE(%rsi), %VEC(1)
         VMOVU   (VEC_SIZE * 2)(%rsi), %VEC(2)
         VMOVU   (VEC_SIZE * 3)(%rsi), %VEC(3)
-       addq    $PREFETCHED_LOAD_SIZE, %rsi
-       subq    $PREFETCHED_LOAD_SIZE, %rdx
-       VMOVNT  %VEC(0), (%rdi)
-       VMOVNT  %VEC(1), VEC_SIZE(%rdi)
-       VMOVNT  %VEC(2), (VEC_SIZE * 2)(%rdi)
-       VMOVNT  %VEC(3), (VEC_SIZE * 3)(%rdi)
-       addq    $PREFETCHED_LOAD_SIZE, %rdi
-       cmpq    $PREFETCHED_LOAD_SIZE, %rdx
-       ja      L(loop_large_forward)
-       sfence
+       subq    $-(VEC_SIZE * 4), %rsi
+       addl    $-(VEC_SIZE * 4), %edx
+       VMOVA   %VEC(0), (%rdi)
+       VMOVA   %VEC(1), VEC_SIZE(%rdi)
+       VMOVA   %VEC(2), (VEC_SIZE * 2)(%rdi)
+       VMOVA   %VEC(3), (VEC_SIZE * 3)(%rdi)
+       subq    $-(VEC_SIZE * 4), %rdi
+       cmpl    $(VEC_SIZE * 4), %edx
+       ja      L(loop_large_memcpy_2x_tail)
+
+L(large_memcpy_2x_end):
         /* Store the last 4 * VEC.  */
-       VMOVU   %VEC(5), (%rcx)
-       VMOVU   %VEC(6), -VEC_SIZE(%rcx)
-       VMOVU   %VEC(7), -(VEC_SIZE * 2)(%rcx)
-       VMOVU   %VEC(8), -(VEC_SIZE * 3)(%rcx)
-       /* Store the first VEC.  */
-       VMOVU   %VEC(4), (%r11)
+       VMOVU   -(VEC_SIZE * 4)(%rsi, %rdx), %VEC(0)
+       VMOVU   -(VEC_SIZE * 3)(%rsi, %rdx), %VEC(1)
+       VMOVU   -(VEC_SIZE * 2)(%rsi, %rdx), %VEC(2)
+       VMOVU   -VEC_SIZE(%rsi, %rdx), %VEC(3)
+
+       VMOVU   %VEC(0), -(VEC_SIZE * 4)(%rdi, %rdx)
+       VMOVU   %VEC(1), -(VEC_SIZE * 3)(%rdi, %rdx)
+       VMOVU   %VEC(2), -(VEC_SIZE * 2)(%rdi, %rdx)
+       VMOVU   %VEC(3), -VEC_SIZE(%rdi, %rdx)
         VZEROUPPER_RETURN
  
-L(large_backward):
-       /* Don't use non-temporal store if there is overlap between
-          destination and source since destination may be in cache
-          when source is loaded.  */
-       leaq    (%rcx, %rdx), %r10
-       cmpq    %r10, %r9
-       jb      L(loop_4x_vec_backward)
-L(loop_large_backward):
-       /* Copy 4 * VEC a time backward with non-temporal stores.  */
-       PREFETCH_ONE_SET (-1, (%rcx), -PREFETCHED_LOAD_SIZE * 2)
-       PREFETCH_ONE_SET (-1, (%rcx), -PREFETCHED_LOAD_SIZE * 3)
-       VMOVU   (%rcx), %VEC(0)
-       VMOVU   -VEC_SIZE(%rcx), %VEC(1)
-       VMOVU   -(VEC_SIZE * 2)(%rcx), %VEC(2)
-       VMOVU   -(VEC_SIZE * 3)(%rcx), %VEC(3)
-       subq    $PREFETCHED_LOAD_SIZE, %rcx
-       subq    $PREFETCHED_LOAD_SIZE, %rdx
-       VMOVNT  %VEC(0), (%r9)
-       VMOVNT  %VEC(1), -VEC_SIZE(%r9)
-       VMOVNT  %VEC(2), -(VEC_SIZE * 2)(%r9)
-       VMOVNT  %VEC(3), -(VEC_SIZE * 3)(%r9)
-       subq    $PREFETCHED_LOAD_SIZE, %r9
-       cmpq    $PREFETCHED_LOAD_SIZE, %rdx
-       ja      L(loop_large_backward)
+       .p2align 4
+L(large_memcpy_4x):
+       movq    %rdx, %r10
+       /* edx will store remainder size for copying tail.  */
+       andl    $(PAGE_SIZE * 4 - 1), %edx
+       /* r10 stores outer loop counter.  */
+       shrq    $(LOG_PAGE_SIZE + 2), %r10
+       /* Copy 4x VEC at a time from 4 pages.  */
+       .p2align 4
+L(loop_large_memcpy_4x_outer):
+       /* ecx stores inner loop counter.  */
+       movl    $(PAGE_SIZE / LARGE_LOAD_SIZE), %ecx
+L(loop_large_memcpy_4x_inner):
+       /* Only one prefetch set per page as doing 4 pages give more time
+          for prefetcher to keep up.  */
+       PREFETCH_ONE_SET(1, (%rsi), PREFETCHED_LOAD_SIZE)
+       PREFETCH_ONE_SET(1, (%rsi), PAGE_SIZE + PREFETCHED_LOAD_SIZE)
+       PREFETCH_ONE_SET(1, (%rsi), PAGE_SIZE * 2 + PREFETCHED_LOAD_SIZE)
+       PREFETCH_ONE_SET(1, (%rsi), PAGE_SIZE * 3 + PREFETCHED_LOAD_SIZE)
+       /* Load vectors from rsi.  */
+       LOAD_ONE_SET((%rsi), 0, %VEC(0), %VEC(1), %VEC(2), %VEC(3))
+       LOAD_ONE_SET((%rsi), PAGE_SIZE, %VEC(4), %VEC(5), %VEC(6), %VEC(7))
+       LOAD_ONE_SET((%rsi), PAGE_SIZE * 2, %VEC(8), %VEC(9), %VEC(10), %VEC(11))
+       LOAD_ONE_SET((%rsi), PAGE_SIZE * 3, %VEC(12), %VEC(13), %VEC(14), %VEC(15))
+       subq    $-LARGE_LOAD_SIZE, %rsi
+       /* Non-temporal store vectors to rdi.  */
+       STORE_ONE_SET((%rdi), 0, %VEC(0), %VEC(1), %VEC(2), %VEC(3))
+       STORE_ONE_SET((%rdi), PAGE_SIZE, %VEC(4), %VEC(5), %VEC(6), %VEC(7))
+       STORE_ONE_SET((%rdi), PAGE_SIZE * 2, %VEC(8), %VEC(9), %VEC(10), %VEC(11))
+       STORE_ONE_SET((%rdi), PAGE_SIZE * 3, %VEC(12), %VEC(13), %VEC(14), %VEC(15))
+       subq    $-LARGE_LOAD_SIZE, %rdi
+       decl    %ecx
+       jnz     L(loop_large_memcpy_4x_inner)
+       addq    $(PAGE_SIZE * 3), %rdi
+       addq    $(PAGE_SIZE * 3), %rsi
+       decq    %r10
+       jne     L(loop_large_memcpy_4x_outer)
         sfence
-       /* Store the first 4 * VEC.  */
-       VMOVU   %VEC(4), (%rdi)
-       VMOVU   %VEC(5), VEC_SIZE(%rdi)
-       VMOVU   %VEC(6), (VEC_SIZE * 2)(%rdi)
-       VMOVU   %VEC(7), (VEC_SIZE * 3)(%rdi)
-       /* Store the last VEC.  */
-       VMOVU   %VEC(8), (%r11)
+       /* Check if only last 4 loads are needed.  */
+       cmpl    $(VEC_SIZE * 4), %edx
+       jbe     L(large_memcpy_4x_end)
+
+       /* Handle the last 4  * PAGE_SIZE bytes.  */
+L(loop_large_memcpy_4x_tail):
+       /* Copy 4 * VEC a time forward with non-temporal stores.  */
+       PREFETCH_ONE_SET (1, (%rsi), PREFETCHED_LOAD_SIZE)
+       PREFETCH_ONE_SET (1, (%rdi), PREFETCHED_LOAD_SIZE)
+       VMOVU   (%rsi), %VEC(0)
+       VMOVU   VEC_SIZE(%rsi), %VEC(1)
+       VMOVU   (VEC_SIZE * 2)(%rsi), %VEC(2)
+       VMOVU   (VEC_SIZE * 3)(%rsi), %VEC(3)
+       subq    $-(VEC_SIZE * 4), %rsi
+       addl    $-(VEC_SIZE * 4), %edx
+       VMOVA   %VEC(0), (%rdi)
+       VMOVA   %VEC(1), VEC_SIZE(%rdi)
+       VMOVA   %VEC(2), (VEC_SIZE * 2)(%rdi)
+       VMOVA   %VEC(3), (VEC_SIZE * 3)(%rdi)
+       subq    $-(VEC_SIZE * 4), %rdi
+       cmpl    $(VEC_SIZE * 4), %edx
+       ja      L(loop_large_memcpy_4x_tail)
+
+L(large_memcpy_4x_end):
+       /* Store the last 4 * VEC.  */
+       VMOVU   -(VEC_SIZE * 4)(%rsi, %rdx), %VEC(0)
+       VMOVU   -(VEC_SIZE * 3)(%rsi, %rdx), %VEC(1)
+       VMOVU   -(VEC_SIZE * 2)(%rsi, %rdx), %VEC(2)
+       VMOVU   -VEC_SIZE(%rsi, %rdx), %VEC(3)
+
+       VMOVU   %VEC(0), -(VEC_SIZE * 4)(%rdi, %rdx)
+       VMOVU   %VEC(1), -(VEC_SIZE * 3)(%rdi, %rdx)
+       VMOVU   %VEC(2), -(VEC_SIZE * 2)(%rdi, %rdx)
+       VMOVU   %VEC(3), -VEC_SIZE(%rdi, %rdx)
         VZEROUPPER_RETURN
  #endif
  END (MEMMOVE_SYMBOL (__memmove, unaligned_erms))
author	noah <goldstein.w.n@gmail.com>
	Sat, 3 Apr 2021 08:12:15 +0000 (04:12 -0400)
committer	H.J. Lu <hjl.tools@gmail.com>
	Fri, 16 Apr 2021 17:06:56 +0000 (10:06 -0700)