From: Dave Brolley Date: Wed, 15 Jun 2005 21:28:19 +0000 (+0000) Subject: 2005-06-15 Dave Brolley X-Git-Url: https://sourceware.org/git/?a=commitdiff_plain;h=06e5b33a78d631225a093611db0143b87d2edb61;p=cgen.git 2005-06-15 Dave Brolley * sid-cpu.scm (-gen-hw-stream-and-destream-fns): New function. (cgen-cpu.h): Call it. Contributed on behalf of Graydon Hoare 2001-06-05 graydon hoare * utils.scm (foldl): Define. (foldr): Define. (filter): Define. (union): Define. (intersection): Simplify. * sid.scm : Set APPLICATION to SID-SIMULATOR. (-op-gen-delayed-set-maybe-trace): Define. ( 'gen-set-{quiet,trace}): Delegate to op-gen-delayed-set-quiet etc. Note: this is still a little tangled up and needs cleaning. (-with-parallel?): Hardwire with-parallel to #t. ( 'cxmake-get): Replace with lookahead-aware code * sid-decode.scm: Remove per-insn writeback fns. (-gen-idesc-decls): Redefine sem_fn type. * sid-cpu.scm (gen-write-stack-structure): Replace parexec stuff with write stack stuff. (cgen-write.cxx): Replace per-insn writebacks with single write stack writeback. Add write stack reset function. (-gen-scache-semantic-fn insn): Replace parexec stuff with write stack stuff. * rtl-c.scm (xop): Clone operand into delayed operand if #:delayed estate attribute set. (delay): Set #:delayed attribute to calculated delay, update maximum delay of cpu, check (delay ...) usage. * operand.scm (): Add delayed slot to . * mach.scm (): Add max-delay slot to . * dev.scm (load-sid): Set APPLICATION to SID-SIMULATOR. * doc/rtl.texi (Expressions): Add section on (delay ...). --- diff --git a/ChangeLog b/ChangeLog index 3273357..ecafee4 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,40 @@ +2005-06-15 Dave Brolley + + * sid-cpu.scm (-gen-hw-stream-and-destream-fns): New function. + (cgen-cpu.h): Call it. + + Contributed on behalf of Graydon Hoare + 2001-06-05 graydon hoare + + * utils.scm (foldl): Define. + (foldr): Define. + (filter): Define. + (union): Define. + (intersection): Simplify. + * sid.scm : Set APPLICATION to SID-SIMULATOR. + (-op-gen-delayed-set-maybe-trace): Define. + ( 'gen-set-{quiet,trace}): Delegate to + op-gen-delayed-set-quiet etc. Note: this is still a little tangled + up and needs cleaning. + (-with-parallel?): Hardwire with-parallel to #t. + ( 'cxmake-get): Replace with lookahead-aware code + * sid-decode.scm: Remove per-insn writeback fns. + (-gen-idesc-decls): Redefine sem_fn type. + * sid-cpu.scm (gen-write-stack-structure): Replace parexec stuff + with write stack stuff. + (cgen-write.cxx): Replace per-insn writebacks with single write + stack writeback. Add write stack reset function. + (-gen-scache-semantic-fn insn): Replace parexec stuff with write + stack stuff. + * rtl-c.scm (xop): Clone operand into delayed operand if #:delayed + estate attribute set. + (delay): Set #:delayed attribute to calculated delay, update + maximum delay of cpu, check (delay ...) usage. + * operand.scm (): Add delayed slot to . + * mach.scm (): Add max-delay slot to . + * dev.scm (load-sid): Set APPLICATION to SID-SIMULATOR. + * doc/rtl.texi (Expressions): Add section on (delay ...). + 2005-06-13 Jim Blandy * pmacros.scm (-pmacro-upcase, -pmacro-downcase): Handle symbols diff --git a/dev.scm b/dev.scm index 8141c37..aa3c579 100644 --- a/dev.scm +++ b/dev.scm @@ -132,7 +132,7 @@ (load "sid-model") (load "sid-decode") (set! verbose-level 3) - (set! APPLICATION 'SIMULATOR) + (set! APPLICATION 'SID-SIMULATOR) ) (define (load-sim) diff --git a/doc/rtl.texi b/doc/rtl.texi index c5c7a4b..32a1cdb 100644 --- a/doc/rtl.texi +++ b/doc/rtl.texi @@ -2271,6 +2271,40 @@ Indicate that there are @samp{num} delay slots in the processing of @samp{expr}. When using this rtx in instruction semantics, CGEN will infer that the instruction has the DELAY-SLOT attribute. +@item (delay num expr) +In older "sim" simulators, indicates that there are @samp{num} delay +slots in the processing of @samp{expr}. When using this rtx in instruction +semantics, CGEN will infer that the instruction has the DELAY-SLOT +attribute. + +In newer "sid" simulators, evaluates to the writeback queue for hardware +operand @samp{expr}, at @samp{num} instruction cycles in the +future. @samp{expr} @emph{must} be a hardware operand in this case. + +For example, @code{(set (delay 3 pc) (+ pc 1))} will schedule write to +the @samp{pc} register in the writeback phase of the 3rd instruction +after the current. Alternatively, @code{(set gr1 (delay 3 gr2))} will +immediately update the @samp{gr1} register with the @emph{latest write} +to the @samp{gr2} register scheduled between the present and 3 +instructions in the future. @code{(delay 0 ...)} refers to the +writeback phase of the current instruction. + +This effect is modeled with a circular buffer of "write stacks" for each +hardware element (register banks get a single stack). The size of the +circular buffer is calculated from the uses of @code{(delay ...)} +rtxs. When a delayed write occurs, the simulator pushes the write onto +the appropriate write stack in the "future" of the circular buffer for +the written-to hardware element. At the end of each instruction cycle, +the simulator executes all writes in all write stacks for the time slice +just ending. When a delayed read (essentially a pipeline bypass) occurs, +the simulator looks ahead in the circular buffer for any writes +scheduled in the future write stack. If it doesn't find one, it +progressively backs off towards the "current" instruction cycle's write +stack, and if it still finds no scheduled writes then it returns the +current state of the CPU. Thus while delayed writes are fast, delayed +reads are potentially slower in a simulator with long pipelines and very +large register banks. + @item (annul yes?) @c FIXME: put annul into the glossary. Annul the following instruction if @samp{yes?} is non-zero. This rtx is diff --git a/mach.scm b/mach.scm index 17605f1..9c31437 100644 --- a/mach.scm +++ b/mach.scm @@ -1155,13 +1155,19 @@ ; Allow a cpu family to override the isa parallel-insns spec. ; ??? Concession to the m32r port which can go away, in time. parallel-insns + + ; Computed: maximum number of insns which may pass before there + ; an insn writes back its output operands. + max-delay + ) nil) ) ; Accessors. -(define-getters cpu (word-bitsize insn-chunk-bitsize file-transform parallel-insns)) +(define-getters cpu (word-bitsize insn-chunk-bitsize file-transform parallel-insns max-delay)) +(define-setters cpu (max-delay)) ; Return endianness of instructions. @@ -1211,7 +1217,9 @@ word-bitsize insn-chunk-bitsize file-transform - parallel-insns) + parallel-insns + 0 ; default max-delay. will compute correct value + ) (begin (logit 2 "Ignoring " name ".\n") #f))) ; cpu is not to be kept diff --git a/operand.scm b/operand.scm index 99014a0..430a492 100644 --- a/operand.scm +++ b/operand.scm @@ -90,6 +90,9 @@ ; referenced. #f means the operand is always referenced by ; the instruction. (cond? . #f) + + ; whether (and by how much) this instance of the operand is delayed. + (delayed . #f) ) nil) ) @@ -135,6 +138,8 @@ (define op:set-num! (elm-make-setter 'num)) (define op:cond? (elm-make-getter 'cond?)) (define op:set-cond?! (elm-make-setter 'cond?)) +(define op:delay (elm-make-getter 'delayed)) +(define op:set-delay! (elm-make-setter 'delayed)) ; Compute the hardware type lazily. ; FIXME: op:type should be named op:hwtype or some such. diff --git a/rtl-c.scm b/rtl-c.scm index 4d21130..7daebfb 100644 --- a/rtl-c.scm +++ b/rtl-c.scm @@ -1305,7 +1305,23 @@ "bad arg to `operand'" object-or-name))) ) -(define-fn xop (estate options mode object) object) +(define-fn xop (estate options mode object) + (let ((delayed (assoc '#:delay (estate-modifiers estate)))) + (if (and delayed + (equal? APPLICATION 'SID-SIMULATOR) + (operand? object)) + ;; if we're looking at an operand inside a (delay ...) rtx, then we + ;; are talking about a _delayed_ operand, which is a different + ;; beast. rather than try to work out what context we were + ;; constructed within, we just clone the operand instance and set + ;; the new one to have a delayed value. the setters and getters + ;; will work it out. + (let ((obj (object-copy object)) + (amount (cadr delayed))) + (op:set-delay! obj amount) + obj) + ;; else return the normal object + object))) (define-fn local (estate options mode object-or-name) (cond ((rtx-temp? object-or-name) @@ -1364,9 +1380,38 @@ (cx:make VOID "; /*clobber*/\n") ) -(define-fn delay (estate options mode n rtx) - (s-sequence (estate-with-modifiers estate '((#:delay))) VOID '() rtx) ; wip! -) + +(define-fn delay (estate options mode num-node rtx) + (case APPLICATION + ((SID-SIMULATOR) + (let* ((n (cadddr num-node)) + (old-delay (let ((old (assoc '#:delay (estate-modifiers estate)))) + (if old (cadr old) 0))) + (new-delay (+ n old-delay))) + (begin + ;; check for proper usage + (if (let* ((hw (case (car rtx) + ((operand) (op:type (rtx-operand-obj rtx))) + ((xop) (op:type (rtx-xop-obj rtx))) + (else #f)))) + (not (and hw (or (pc? hw) (memory? hw) (register? hw))))) + (context-error + (estate-context estate) + (string-append + "(delay ...) rtx applied to wrong type of operand '" (car rtx) "'. should be pc, register or memory"))) + ;; signal an error if we're delayed and not in a "parallel-insns" CPU + (if (not (with-parallel?)) + (context-error + (estate-context estate) + "delayed operand in a non-parallel cpu")) + ;; update cpu-global pipeline bound + (cpu-set-max-delay! (current-cpu) (max (cpu-max-delay (current-cpu)) new-delay)) + ;; pass along new delay to embedded rtx + (rtx-eval-with-estate rtx mode (estate-with-modifiers estate `((#:delay ,new-delay))))))) + + ;; not in sid-land + (else (s-sequence (estate-with-modifiers estate '((#:delay))) VOID '() rtx)))) + ; Gets expanded as a macro. ;(define-fn annul (estate yes?) diff --git a/sid-cpu.scm b/sid-cpu.scm index 4ddc44d..a13bc51 100644 --- a/sid-cpu.scm +++ b/sid-cpu.scm @@ -1,5 +1,5 @@ ; CPU family related simulator generator, excluding decoding and model support. -; Copyright (C) 2000, 2002 Red Hat, Inc. +; Copyright (C) 2000, 2002, 2003, 2005 Red Hat, Inc. ; This file is part of CGEN. ; *********** @@ -199,6 +199,84 @@ namespace @arch@ { (-gen-hardware-struct #f (find hw-need-storage? (current-hw-list)))) ) +(define (-gen-hw-stream-and-destream-fns) + (let* ((sa string-append) + (regs (find hw-need-storage? (current-hw-list))) + (reg-dim (lambda (r) + (let ((dims (-hw-vector-dims r))) + (if (equal? 0 (length dims)) + "0" + (number->string (car dims)))))) + (write-stacks + (map (lambda (n) (sa n "_writes")) + (append (map (lambda (r) (gen-c-symbol (obj:name r))) regs) + (map (lambda (m) (sa m "_memory")) useful-mode-names)))) + (stream-reg (lambda (r) + (let ((rname (sa "hardware." (gen-c-symbol (obj:name r))))) + (if (hw-scalar? r) + (sa " ost << " rname " << ' ';\n") + (sa " for (int i = 0; i < " (reg-dim r) + "; i++)\n ost << " rname "[i] << ' ';\n"))))) + (destream-reg (lambda (r) + (let ((rname (sa "hardware." (gen-c-symbol (obj:name r))))) + (if (hw-scalar? r) + (sa " ist >> " rname ";\n") + (sa " for (int i = 0; i < " (reg-dim r) + "; i++)\n ist >> " rname "[i];\n"))))) + (stream-stacks (lambda (s) (sa " stream_stacks ( stacks." s ", ost);\n"))) + (destream-stacks (lambda (s) (sa " destream_stacks ( stacks." s ", ist);\n"))) + (stack-boilerplate + (sa + " template \n" + " void stream_stacks (const ST &st, std::ostream &ost) const\n" + " {\n" + " for (int i = 0; i < @prefix@::pipe_sz; i++)\n" + " {\n" + " ost << st[i].t << ' ';\n" + " for (int j = 0; j <= st[i].t; j++)\n" + " {\n" + " ost << st[i].buf[j].pc << ' ';\n" + " ost << st[i].buf[j].val << ' ';\n" + " ost << st[i].buf[j].idx0 << ' ';\n" + " }\n" + " }\n" + " }\n" + " \n" + " template \n" + " void destream_stacks (ST &st, std::istream &ist)\n" + " {\n" + " for (int i = 0; i < @prefix@::pipe_sz; i++)\n" + " {\n" + " ist >> st[i].t;\n" + " for (int j = 0; j <= st[i].t; j++)\n" + " {\n" + " ist >> st[i].buf[j].pc;\n" + " ist >> st[i].buf[j].val;\n" + " ist >> st[i].buf[j].idx0;\n" + " }\n" + " }\n" + " }\n" + " \n"))) + (sa + " void stream_cgen_hardware (std::ostream &ost) const \n {\n" + (string-map stream-reg regs) + " }\n" + " void destream_cgen_hardware (std::istream &ist) \n {\n" + (string-map destream-reg regs) + " }\n" + (if (with-parallel?) + (sa stack-boilerplate + " void stream_cgen_write_stacks (std::ostream &ost, " + "const @prefix@::write_stacks &stacks) const \n {\n" + (string-map stream-stacks write-stacks) + " }\n" + " void destream_cgen_write_stacks (std::istream &ist, " + "@prefix@::write_stacks &stacks) \n {\n" + (string-map destream-stacks write-stacks) + " }\n") + "")))) + + ; Generate -cpu.h (define (cgen-cpu.h) @@ -222,6 +300,8 @@ public: -gen-hardware-types + -gen-hw-stream-and-destream-fns + " // C++ register access function templates\n" "#define current_cpu this\n\n" (lambda () @@ -295,68 +375,150 @@ typedef struct { ) ) -; Utility of gen-parallel-exec-type to generate the definition of one -; structure in PAREXEC. -; SFMT is an object. - -(define (gen-parallel-exec-elm sfmt) - (string-append - " struct { /* " (obj:comment sfmt) " */\n" - (let ((sem-ops - ((if (with-parallel-write?) sfmt-out-ops sfmt-in-ops) sfmt))) - (if (null? sem-ops) - " int empty;\n" - (string-map - (lambda (op) - (logit 2 "Processing operand " (obj:name op) " of format " - (obj:name sfmt) " ...\n") - (if (with-parallel-write?) - (let ((index-type (and (op-save-index? op) - (gen-index-type op sfmt)))) - (string-append " " (gen-type op) - " " (gen-sym op) ";\n" - (if index-type - (string-append " " index-type - " " (gen-sym op) "_idx;\n") - ""))) - (string-append " " - (gen-type op) - " " - (gen-sym op) - ";\n"))) - sem-ops))) - " } " (gen-sym sfmt) ";\n" - ) -) +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;; begin stack-based write schedule +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +(define useful-mode-names '(BI QI HI SI DI UQI UHI USI UDI SF DF)) + +(define (-calculated-memory-write-buffer-size) + (let* ((is-mem? (lambda (op) (eq? (hw-sem-name (op:type op)) 'h-memory))) + (count-mem-writes + (lambda (sfmt) (length (find is-mem? (sfmt-out-ops sfmt)))))) + (apply max (append '(0) (map count-mem-writes (current-sfmt-list)))))) + + +;; note: this doesn't really correctly approximate the worst case. user-supplied functions +;; might rewrite the pipeline extensively while it's running. +;(define (-worst-case-number-of-writes-to hw-name) +; (let* ((sfmts (current-sfmt-list)) +; (out-ops (map sfmt-out-ops sfmts)) +; (pred (lambda (op) (equal? hw-name (gen-c-symbol (obj:name (op:type op)))))) +; (filtered-ops (map (lambda (ops) (find pred ops)) out-ops))) +; (apply max (cons 0 (map (lambda (ops) (length ops)) filtered-ops))))) + +(define (-hw-gen-write-stack-decl nm mode) + (let* ( +; for the time being, we're disabling this size-estimation stuff and just +; requiring the user to supply a parameter WRITE_BUF_SZ before they include -defs.h +; (pipe-sz (+ 1 (max-delay (cpu-max-delay (current-cpu))))) +; (sz (* pipe-sz (-worst-case-number-of-writes-to nm)))) + + (mode-pad (spaces (- 4 (string-length mode)))) + (stack-name (string-append nm "_writes"))) + (string-append + " write_stack< write<" mode "> >" mode-pad "\t" stack-name "\t[pipe_sz];\n"))) + + +(define (-hw-gen-write-struct-decl) + (let* ((dims (-worst-case-index-dims)) + (sa string-append) + (ns number->string) + (idxs (iota dims)) + (ctor (sa "write (PCADDR _pc, MODE _val" + (string-map (lambda (x) (sa ", USI _idx" (ns x) "=0")) idxs) + ") : pc(_pc), val(_val)" + (string-map (lambda (x) (sa ", idx" (ns x) "(_idx" (ns x) ")")) idxs) + " {} \n")) + (idx-fields (string-map (lambda (x) (sa " USI idx" (ns x) ";\n")) idxs))) + (sa + "\n\n" + " template \n" + " struct write\n" + " {\n" + " USI pc;\n" + " MODE val;\n" + idx-fields + " " ctor + " write() {}\n" + " };\n" ))) + +(define (-hw-vector-dims hw) (elm-get (hw-type hw) 'dimensions)) +(define (-worst-case-index-dims) + (apply max + (append '(1) ; for memory accesses + (map (lambda (hw) (length (-hw-vector-dims hw))) + (find (lambda (hw) (not (scalar? hw))) (current-hw-list)))))) + + +(define (-gen-writestacks) + (let* ((hw (find register? (current-hw-list))) + (modes useful-mode-names) + (hw-pairs (map (lambda (h) (list (gen-c-symbol (obj:name h)) + (obj:name (hw-mode h)))) + hw)) + (mem-pairs (map (lambda (m) (list (string-append (symbol->string m) + "_memory") m)) + modes)) + (all-pairs (append mem-pairs hw-pairs)) + + (h1 "\n\n// write stacks used in parallel execution\n\n struct write_stacks\n {\n // types of stacks\n\n") + (wb (string-append + "\n\n // unified writeback function (defined in @prefix@-write.cc)" + "\n void writeback (int tick, @cpu@::@cpu@_cpu* current_cpu);" + "\n // unified write-stack clearing function (defined in @prefix@-write.cc)" + "\n void reset ();")) + (zz "\n\n }; // end struct @prefix@::write_stacks \n\n")) + (string-append + (-hw-gen-write-struct-decl) + (foldl (lambda (s pair) (string-append s (apply -hw-gen-write-stack-decl pair))) h1 all-pairs) + wb + zz))) + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;; end stack-based write schedule +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; Generate the definition of the structure that holds register values, etc. -; for use during parallel execution. When instructions are executed parallelly -; either -; - their inputs are read before their outputs are written. Thus we have to -; fetch the input values of several instructions before executing any of them. -; - or their outputs are queued here first and then written out after all insns -; have executed. -; The fetched/queued values are stored in an array of PAREXEC structs, one -; element per instruction. - -(define (gen-parallel-exec-type) - (logit 2 "Generating PAREXEC type ...\n") - (string-append - (if (with-parallel-write?) - "/* Queued output values of an instruction. */\n" - "/* Fetched input values of an instruction. */\n") - "\ +; for use during parallel execution. + +(define (gen-write-stack-structure) + (let ((membuf-sz (-calculated-memory-write-buffer-size)) + (max-delay (cpu-max-delay (current-cpu)))) + (logit 2 "Generating write stack structure ...\n") + (string-append + " static const int max_delay = " + (number->string max-delay) ";\n" + " static const int pipe_sz = " + (number->string (+ 1 max-delay)) "; // max_delay + 1\n" -struct @prefix@_parexec { - union {\n" - (string-map gen-parallel-exec-elm (current-sfmt-list)) - "\ - } operands; - /* For conditionally written operands, bitmask of which ones were. */ - unsigned long long written; -};\n\n" - ) -) +" + template + struct write_stack + { + int t; + const int sz; + ELT buf[WRITE_BUF_SZ]; + + write_stack () : t(-1), sz(WRITE_BUF_SZ) {} + inline bool empty () { return (t == -1); } + inline void clear () { t = -1; } + inline void pop () { if (t > -1) t--;} + inline void push (const ELT &e) { if (t+1 < sz) buf [++t] = e;} + inline ELT &top () { return buf [t>0 ? ( t + inline VAL lookahead (int dist, int base, STKS &st, VAL def, int idx=0) + { + for (; dist > 0; --dist) + { + write_stack &v = st [(base + dist) % pipe_sz]; + for (int i = v.t; i > 0; --i) + if (v.buf [i].idx0 == idx) return v.buf [i]; + } + return def; + } + +" + + (-gen-writestacks) + ))) ; Generate the TRACE_RECORD struct definition. @@ -392,15 +554,27 @@ typedef struct @prefix@_trace_record { #ifndef DEFS_@PREFIX@_H #define DEFS_@PREFIX@_H +") + (if (with-parallel?) + (string-write "\ +#include +#include \"cgen-types.h\" + +// forward declaration\n\n namespace @cpu@ { -\n" +struct @cpu@_cpu; +} - (if (with-parallel?) - gen-parallel-exec-type - "") +namespace @prefix@ { - "\ -} // end @cpu@ namespace +using namespace cgen; + +" + gen-write-stack-structure + "\ +} // end @prefix@ namespace +")) + (string-write "\ #endif /* DEFS_@PREFIX@_H */\n" ) @@ -417,47 +591,80 @@ namespace @cpu@ { ; Return C code to fetch and save all output operands to instructions with ; SFMT. -(define (-gen-write-args sfmt) - (string-map (lambda (op) (op:write op sfmt)) - (sfmt-out-ops sfmt)) -) - -; Utility of gen-write-fns to generate a writer function for SFMT. -(define (-gen-write-fn sfmt) - (logit 2 "Processing write function for \"" (obj:name sfmt) "\" ...\n") - (string-list - "\nsem_status\n" - (-gen-write-fn-name sfmt) " (@cpu@_cpu* current_cpu, @prefix@_scache* sem, @prefix@_parexec* par_exec)\n" - "{\n" - (if (with-scache?) - (gen-define-field-macro sfmt) - "") - (gen-define-parallel-operand-macro sfmt) - " @prefix@_scache* abuf = sem;\n" - " unsigned long long written = abuf->written;\n" - " PCADDR pc = abuf->addr;\n" - " PCADDR npc = 0; // dummy value for branches\n" - " sem_status status = SEM_STATUS_NORMAL; // ditto\n" - "\n" - (-gen-write-args sfmt) - "\n" - " return status;\n" - (gen-undef-parallel-operand-macro sfmt) - (if (with-scache?) - (gen-undef-field-macro sfmt) - "") - "}\n\n") -) - -(define (-gen-write-fns) - (logit 2 "Processing writer functions ...\n") - (string-write-map (lambda (sfmt) (-gen-write-fn sfmt)) - (current-sfmt-list)) -) +; Generate -write.cxx. +(define (-gen-register-writer nm mode dims) + (let* ((pad " ") + (sa string-append) + (mode (symbol->string mode)) + (idx-args (string-map (lambda (x) (sa "w.idx" (number->string x) ", ")) + (iota dims)))) + (sa pad "while (! " nm "_writes[tick].empty())\n" + pad "{\n" + pad " write<" mode "> &w = " nm "_writes[tick].top();\n" + pad " current_cpu->" nm "_set(" idx-args "w.val);\n" + pad " " nm "_writes[tick].pop();\n" + pad "}\n\n"))) + +(define (-gen-memory-writer nm mode dims) + (let* ((pad " ") + (sa string-append) + (mode (symbol->string mode)) + (idx-args (string-map (lambda (x) (sa ", w.idx" (number->string x) "")) + (iota dims)))) + (sa pad "while (! " nm "_writes[tick].empty())\n" + pad "{\n" + pad " write<" mode "> &w = " nm "_writes[tick].top();\n" + pad " current_cpu->SETMEM" mode " (w.pc" idx-args ", w.val);\n" + pad " " nm "_writes[tick].pop();\n" + pad "}\n\n"))) + + +(define (-gen-reset-fn) + (let* ((sa string-append) + (objs (append (map (lambda (h) (gen-c-symbol (obj:name h))) + (find register? (current-hw-list))) + (map (lambda (m) (sa (symbol->string m) "_memory")) + useful-mode-names))) + (clr (lambda (elt) (sa " clear_stacks (" elt "_writes);\n")))) + (sa + " template \n" + " static void clear_stacks (ST &st)\n" + " {\n" + " for (int i = 0; i < @prefix@::pipe_sz; i++)\n" + " st[i].clear();\n" + " }\n\n" + " void @prefix@::write_stacks::reset ()\n {\n" + (string-map clr objs) + " }"))) + +(define (-gen-unified-write-fn) + (let* ((hw (find register? (current-hw-list))) + (modes useful-mode-names) + (hw-triples (map (lambda (h) (list (gen-c-symbol (obj:name h)) + (obj:name (hw-mode h)) + (length (-hw-vector-dims h)))) + hw)) + (mem-triples (map (lambda (m) (list (string-append (symbol->string m) + "_memory") + m 1)) + modes))) + + (logit 2 "Generating writer function ...\n") + (string-append + " -; Generate -write.cxx. + void @prefix@::write_stacks::writeback (int tick, @cpu@::@cpu@_cpu* current_cpu) + { +" + "\n // register writeback loops\n" + (string-map (lambda (t) (apply -gen-register-writer t)) hw-triples) + "\n // memory writeback loops\n" + (string-map (lambda (t) (apply -gen-memory-writer t)) mem-triples) +" + } +"))) (define (cgen-write.cxx) (logit 1 "Generating " (gen-cpu-name) " write.cxx ...\n") @@ -479,10 +686,10 @@ namespace @cpu@ { "\ #include \"@cpu@.h\" -using namespace @cpu@; " - -gen-write-fns + -gen-reset-fn + -gen-unified-write-fn ) ) @@ -522,19 +729,14 @@ using namespace @cpu@; "sem_status\n") "@prefix@_sem_" (gen-sym insn) (if (with-parallel?) - " (@cpu@_cpu* current_cpu, @prefix@_scache* sem, @prefix@_parexec* par_exec)\n" + (string-append " (@cpu@_cpu* current_cpu, @prefix@_scache* sem, const int tick, \n\t" + "@prefix@::write_stacks &buf)\n") " (@cpu@_cpu* current_cpu, @prefix@_scache* sem)\n") "{\n" (gen-define-field-macro (insn-sfmt insn)) - (if (with-parallel?) - (gen-define-parallel-operand-macro (insn-sfmt insn)) - "") " sem_status status = SEM_STATUS_NORMAL;\n" " @prefix@_scache* abuf = sem;\n" - ; Unconditionally written operands are not recorded here. - (if (or (with-profile?) (with-parallel-write?)) - " unsigned long long written = 0;\n" - "") + ; The address of this insn, needed by extraction and semantic code. ; Note that the address recorded in the cpu state struct is not used. ; For faster engines that copy will be out of date. @@ -543,25 +745,14 @@ using namespace @cpu@; "\n" (gen-semantic-code insn) "\n" - ; Only update what's been written if some are conditionally written. - ; Otherwise we know they're all written so there's no point in - ; keeping track. - (if (or (with-profile?) (with-parallel-write?)) - (if (-any-cond-written? (insn-sfmt insn)) - " abuf->written = written;\n" - "") - "") (if cti? " current_cpu->done_cti_insn (npc, status);\n" " current_cpu->done_insn (npc, status);\n") (if (with-parallel?) "" " return status;\n") - (if (with-parallel?) - (gen-undef-parallel-operand-macro (insn-sfmt insn)) - "") (gen-undef-field-macro (insn-sfmt insn)) - "}\n\n" + "}\n\n" )) ) @@ -659,9 +850,6 @@ using namespace @cpu@; // FIXME: namespace organization still wip (if (with-scache?) (gen-define-field-macro (insn-sfmt insn)) "") - (if parallel? - (gen-define-parallel-operand-macro (insn-sfmt insn)) - "") ; Unconditionally written operands are not recorded here. (if (or (with-profile?) (with-parallel-write?)) " unsigned long long written = 0;\n" @@ -698,9 +886,6 @@ using namespace @cpu@; // FIXME: namespace organization still wip (string-append " pbb_br_npc = npc;\n" " pbb_br_status = br_status;\n") "") - (if parallel? - (gen-undef-parallel-operand-macro (insn-sfmt insn)) - "") (if (with-scache?) (gen-undef-field-macro (insn-sfmt insn)) "") @@ -954,9 +1139,6 @@ struct @prefix@_pbb_label { " vpc = vpc + 1;\n") "") (gen-define-field-macro (sfrag-sfmt frag)) - (if parallel? - (gen-define-parallel-operand-macro (sfrag-sfmt frag)) - "") ; Unconditionally written operands are not recorded here. (if (or (with-profile?) (with-parallel-write?)) " unsigned long long written = 0;\n" @@ -997,9 +1179,6 @@ struct @prefix@_pbb_label { (string-append " pbb_br_npc = npc;\n" " pbb_br_status = br_status;\n") "") - (if parallel? - (gen-undef-parallel-operand-macro (sfrag-sfmt frag)) - "") (gen-undef-field-macro (sfrag-sfmt frag)) " }\n" (if (sfrag-trailer? frag) diff --git a/sid-decode.scm b/sid-decode.scm index 722206f..698eebe 100644 --- a/sid-decode.scm +++ b/sid-decode.scm @@ -47,10 +47,7 @@ bool @prefix@_idesc::idesc_table_initialized_p = false;\n\n" (if pbb? "0, " (string-append (-gen-sem-fn-name insn) ", ")) - "") - (if (with-parallel?) - (string-append (-gen-write-fn-name sfmt) ", ") - "") + "") "\"" (string-upcase name) "\", " (gen-cpu-insn-enum (current-cpu) insn) ", " @@ -131,25 +128,6 @@ bool @prefix@_idesc::idesc_table_initialized_p = false;\n\n" ) -;; and the same for writeback functions - -(define (-gen-write-fn-name sfmt) - (string-append "@prefix@_write_" (gen-sym sfmt)) -) - - -(define (-gen-write-fn-decls) - (string-write - "// Decls of each writeback fn.\n\n" - "using @cpu@::@prefix@_write_fn;\n" - (string-list-map (lambda (sfmt) - (string-list "extern @prefix@_write_fn " - (-gen-write-fn-name sfmt) - ";\n")) - (current-sfmt-list)) - "\n" - ) -) ; idesc, argbuf, and scache types @@ -164,14 +142,9 @@ struct @cpu@_cpu; struct @prefix@_scache; " (if (with-parallel?) - "struct @prefix@_parexec;\n" "") - (if (with-parallel?) - "typedef void (@prefix@_sem_fn) (@cpu@_cpu* cpu, @prefix@_scache* sem, @prefix@_parexec* par_exec);" + "typedef void (@prefix@_sem_fn) (@cpu@_cpu* cpu, @prefix@_scache* sem, int tick, @prefix@::write_stacks &buf);" "typedef sem_status (@prefix@_sem_fn) (@cpu@_cpu* cpu, @prefix@_scache* sem);") "\n" - (if (with-parallel?) - "typedef sem_status (@prefix@_write_fn) (@cpu@_cpu* cpu, @prefix@_scache* sem, @prefix@_parexec* par_exec);" - "") "\n" " // Instruction descriptor. @@ -192,12 +165,6 @@ struct @prefix@_idesc { @prefix@_sem_fn* execute;\n\n" "") - (if (with-parallel?) - "\ - // scache write executor for this insn - @prefix@_write_fn* writeback;\n\n" - "") - "\ const char* insn_name; enum @prefix@_insn_type sem_index; @@ -300,7 +267,7 @@ struct @prefix@_scache { // argument buffer @prefix@_sem_fields fields; -" (if (or (with-any-profile?) (with-parallel-write?)) +" (if (with-any-profile?) (string-append " // writeback flags // Only used if profiling or parallel execution support enabled during @@ -549,7 +516,7 @@ struct @prefix@_scache { (let ((in-ops (find op-profilable? (sfmt-in-ops sfmt))) (out-ops (find op-profilable? (sfmt-out-ops sfmt))) ) - (if (and (null? in-ops) (null? out-ops)) + (if (or (not (with-any-profile?)) (and (null? in-ops) (null? out-ops))) "" (string-list " /* Record the fields for profiling. */\n" @@ -714,6 +681,17 @@ void #ifndef @PREFIX@_DECODE_H #define @PREFIX@_DECODE_H +" + (if (with-parallel?) + "\ +namespace @prefix@ { +// forward declaration of struct in -defs.h +struct write_stacks; +} + +" + "") +"\ namespace @cpu@ { using namespace cgen; @@ -737,10 +715,6 @@ typedef UINT @prefix@_insn_word; -gen-sem-fn-decls "") - (if (with-parallel?) - -gen-write-fn-decls - "") - "\ #endif /* @PREFIX@_DECODE_H */\n" ) diff --git a/sid.scm b/sid.scm index e40d0d5..d728e46 100644 --- a/sid.scm +++ b/sid.scm @@ -931,43 +931,6 @@ (rtl-c++ INT yes? nil #:rtl-cover-fns? #t))) ) -; For parallel write post-processing, we don't want to defer setting the pc. -; ??? Not sure anymore. -;(method-make! -; 'gen-set-quiet -; (lambda (self estate mode index selector newval) -; (-op-gen-set-quiet self estate mode index selector newval))) -;(method-make! -; 'gen-set-trace -; (lambda (self estate mode index selector newval) -; (-op-gen-set-trace self estate mode index selector newval))) - -; Name of C macro to access parallel execution operand support. - -(define -par-operand-macro "OPRND") - -; Return C code to fetch an operand's value and save it away for the -; semantic handler. This is used to handle parallel execution of several -; instructions where all inputs of all insns are read before any outputs are -; written. -; For operands, the word `read' is only used in this context. - -(define (op:read op sfmt) - (let ((estate (estate-make-for-normal-rtl-c++ nil nil))) - (send op 'gen-read estate sfmt -par-operand-macro)) -) - -; Return C code to write an operand's value. -; This is used to handle parallel execution of several instructions where all -; outputs are written to temporary spots first, and then a final -; post-processing pass is run to update cpu state. -; For operands, the word `write' is only used in this context. - -(define (op:write op sfmt) - (let ((estate (estate-make-for-normal-rtl-c++ nil nil))) - (send op 'gen-write estate sfmt -par-operand-macro)) -) - ; Default gen-read method. ; This is used to help support targets with parallel insns. ; Either this or gen-write (but not both) is used. @@ -1017,36 +980,45 @@ (method-make! 'cxmake-get (lambda (self estate mode index selector) - (let ((mode (if (mode:eq? 'DFLT mode) - (send self 'get-mode) - mode)) - (index (if index index (op:index self))) - (selector (if selector selector (op:selector self)))) - ; If the object is marked with the RAW attribute, access the hardware - ; object directly. + (let* ((mode (if (mode:eq? 'DFLT mode) + (send self 'get-mode) + mode)) + (hw (op:type self)) + (index (if index index (op:index self))) + (idx (if index (-gen-hw-index index estate) "")) + (idx-args (if (equal? idx "") "" (string-append ", " idx))) + (selector (if selector selector (op:selector self))) + (delayval (op:delay self)) + (md (mode:c-type mode)) + (name (if + (eq? (obj:name hw) 'h-memory) + (string-append md "_memory") + (gen-c-symbol (obj:name hw)))) + (getter (op:getter self)) + (def-val (cond ((obj-has-attr? self 'RAW) + (send hw 'cxmake-get-raw estate mode index selector)) + (getter + (let ((args (car getter)) + (expr (cadr getter))) + (rtl-c-expr mode expr + (if (= (length args) 0) nil + (list (list (car args) 'UINT index))) + #:rtl-cover-fns? #t + #:output-language (estate-output-language estate)))) + (else + (send hw 'cxmake-get estate mode index selector))))) + (logit 4 " cxmake-get self=" (obj:name self) " mode=" (obj:name mode) " index=" (obj:name index) " selector=" selector "\n") - (cond ((obj-has-attr? self 'RAW) - (send (op:type self) 'cxmake-get-raw estate mode index selector)) - ; If the instruction could be parallely executed with others and - ; we're doing read pre-processing, the operand has already been - ; fetched, we just have to grab the cached value. - ((with-parallel-read?) - (cx:make-with-atlist mode - (string-append -par-operand-macro - " (" (gen-sym self) ")") - nil)) ; FIXME: want CACHED attr if present - ((op:getter self) - (let ((args (car (op:getter self))) - (expr (cadr (op:getter self)))) - (rtl-c-expr mode expr - (if (= (length args) 0) - nil - (list (list (car args) 'UINT index))) - #:rtl-cover-fns? #t - #:output-language (estate-output-language estate)))) - (else - (send (op:type self) 'cxmake-get estate mode index selector))))) + + (if delayval + (cx:make mode (string-append "lookahead (" + (number->string delayval) + ", tick, " + "buf." name "_writes, " + (cx:c def-val) + idx-args ")")) + def-val))) ) @@ -1056,16 +1028,9 @@ (send (op:type op) 'gen-set-quiet estate mode index selector newval) ) -(define (-op-gen-set-quiet-parallel op estate mode index selector newval) - (string-append - (if (op-save-index? op) - (string-append " " -par-operand-macro " (" (-op-index-name op) ")" - " = " (-gen-hw-index index estate) ";\n") - "") - " " - -par-operand-macro " (" (gen-sym op) ")" - " = " (cx:c newval) ";\n") -) +(define (-op-gen-delayed-set-quiet op estate mode index selector newval) + (-op-gen-delayed-set-maybe-trace op estate mode index selector newval #f)) + (define (-op-gen-set-trace op estate mode index selector newval) (string-append @@ -1129,15 +1094,41 @@ " }\n") ) -(define (-op-gen-set-trace-parallel op estate mode index selector newval) - (string-append - " {\n" - " " (mode:c-type mode) " opval = " (cx:c newval) ";\n" - (if (op:cond? op) - (string-append " written |= (1ULL << " - (number->string (op:num op)) - ");\n") - "") +(define (-op-gen-delayed-set-trace op estate mode index selector newval) + (-op-gen-delayed-set-maybe-trace op estate mode index selector newval #t)) + +(define (-op-gen-delayed-set-maybe-trace op estate mode index selector newval do-trace?) + (let* ((pad " ") + (hw (op:type op)) + (delayval (op:delay op)) + (md (mode:c-type mode)) + (name (if + (eq? (obj:name hw) 'h-memory) + (string-append md "_memory") + (gen-c-symbol (obj:name hw)))) + (val (cx:c newval)) + (idx (if index (-gen-hw-index index estate) "")) + (idx-args (if (equal? idx "") "" (string-append ", " idx))) + ) + + (string-append + " {\n" + + (if delayval + + ;; delayed write: push it to the appropriate buffer + (string-append + pad md " opval = " val ";\n" + pad "buf." name "_writes [(tick + " (number->string delayval) + ") % @prefix@::pipe_sz].push (@prefix@::write<" md ">(pc, opval" idx-args "));\n") + + ;; else, uh, we should never have been called! + (error "-op-gen-delayed-set-maybe-trace called on non-delayed operand")) + + + (if do-trace? + + (string-append ; TRACE_RESULT_ (cpu, abuf, hwnum, opnum, value); ; For each insn record array of operand numbers [or indices into ; operand instance table]. @@ -1169,15 +1160,9 @@ "(USI) " "")) "opval << dec << \" \";\n" - (if (op-save-index? op) - (string-append " " -par-operand-macro " (" (-op-index-name op) ")" - " = " (-gen-hw-index index estate) ";\n") - "") - " " -par-operand-macro " (" (gen-sym op) ")" - " = opval;\n" " }\n") -) - + ;; else no tracing is emitted + "")))) ; Return C code to set the value of an operand. ; NEWVAL is a object of the value to store. @@ -1196,8 +1181,8 @@ (selector (if selector selector (op:selector self)))) (cond ((obj-has-attr? self 'RAW) (send (op:type self) 'gen-set-quiet-raw estate mode index selector newval)) - ((with-parallel-write?) - (-op-gen-set-quiet-parallel self estate mode index selector newval)) + ((op:delay self) + (-op-gen-delayed-set-quiet self estate mode index selector newval)) (else (-op-gen-set-quiet self estate mode index selector newval))))) ) @@ -1219,26 +1204,12 @@ (selector (if selector selector (op:selector self)))) (cond ((obj-has-attr? self 'RAW) (send (op:type self) 'gen-set-quiet-raw estate mode index selector newval)) - ((with-parallel-write?) - (-op-gen-set-trace-parallel self estate mode index selector newval)) + ((op:delay self) + (-op-gen-delayed-set-trace self estate mode index selector newval)) (else (-op-gen-set-trace self estate mode index selector newval))))) ) -; Define and undefine C macros to tuck away details of instruction format used -; in the parallel execution functions. See gen-define-field-macro for a -; similar thing done for extraction/semantic functions. - -(define (gen-define-parallel-operand-macro sfmt) - (string-append "#define " -par-operand-macro "(f) " - "par_exec->operands." - (gen-sym sfmt) - ".f\n") -) - -(define (gen-undef-parallel-operand-macro sfmt) - (string-append "#undef " -par-operand-macro "\n") -) ; Operand profiling and parallel execution support. diff --git a/utils.scm b/utils.scm index e5edce0..80312b0 100644 --- a/utils.scm +++ b/utils.scm @@ -563,6 +563,22 @@ (reverse! (list-drop n (reverse l))) ) +;; left fold +(define (foldl kons accum lis) + (if (null? lis) accum + (foldl kons (kons accum (car lis)) (cdr lis)))) + +;; right fold +(define (foldr kons knil lis) + (if (null? lis) knil + (kons (car lis) (foldr kons knil (cdr lis))))) + +;; filter list on predicate +(define (filter p ls) + (foldr (lambda (x a) (if (p x) (cons x a) a)) + '() ls)) + + ; APL's +\ operation on a vector of numbers. (define (plus-scan l) @@ -635,12 +651,13 @@ ; Return intersection of two lists. -(define (intersection l1 l2) - (cond ((null? l1) l1) - ((null? l2) l2) - ((memq (car l1) l2) (cons (car l1) (intersection (cdr l1) l2))) - (else (intersection (cdr l1) l2))) -) +(define (intersection a b) + (foldl (lambda (l e) (if (memq e a) (cons e l) l)) '() b)) + +; Return union of two lists. + +(define (union a b) + (foldl (lambda (l e) (if (memq e l) l (cons e l))) a b)) ; Return a count of the number of elements of list L1 that are in list L2. ; Uses memq.