From a0eae4b4a9e0635fbb2a983673d1dd942f150ea7 Mon Sep 17 00:00:00 2001 From: eb Date: Mon, 24 Mar 2008 07:46:47 +0000 Subject: [PATCH 1/1] Fix for gcell corrrectness/performance problem. Replaces mfc_sync with appropriate use of tag and fenced get. We could pick up a bit of additional performance by double buffering the the local store job descriptor, but that's left for a rainy day. git-svn-id: http://gnuradio.org/svn/gnuradio/trunk@8090 221aa14e-8319-0410-a670-987f0aec2ac5 --- gcell/src/include/spu/gc_jd_queue.h | 4 +++- gcell/src/lib/runtime/spu/gc_main.c | 7 ++----- gcell/src/lib/runtime/spu/gc_spu_jd_queue.c | 11 ++++++++--- 3 files changed, 13 insertions(+), 9 deletions(-) diff --git a/gcell/src/include/spu/gc_jd_queue.h b/gcell/src/include/spu/gc_jd_queue.h index f1ce1b3b..7a6ac2e2 100644 --- a/gcell/src/include/spu/gc_jd_queue.h +++ b/gcell/src/include/spu/gc_jd_queue.h @@ -35,12 +35,14 @@ __GC_BEGIN_DECLS * * \param[in] q is EA address of queue structure. * \param[out] item_ea is EA address of item at head of queue. + * \param[in] jd_tag is the tag to use to get the LS copy of the item. * \param[out] item is local store copy of item at head of queue. * \returns false if the queue is empty, otherwise returns true * and sets \p item_ea and DMA's job descriptor into \p item */ bool -gc_jd_queue_dequeue(gc_eaddr_t q, gc_eaddr_t *item_ea, gc_job_desc_t *item); +gc_jd_queue_dequeue(gc_eaddr_t q, gc_eaddr_t *item_ea, + int jd_tag, gc_job_desc_t *item); /*! diff --git a/gcell/src/lib/runtime/spu/gc_main.c b/gcell/src/lib/runtime/spu/gc_main.c index ef552f14..867a21de 100644 --- a/gcell/src/lib/runtime/spu/gc_main.c +++ b/gcell/src/lib/runtime/spu/gc_main.c @@ -542,9 +542,6 @@ process_job(gc_eaddr_t jd_ea, gc_job_desc_t *jd) int tag = ci_tags + ci_idx; // use the current completion tag mfc_put(jd, jd_ea, sizeof(*jd), tag, 0, 0); - mfc_sync(tag); // FIXME this makes it work, but is expensive - - // Tell PPE we're done with the job. // // We queue these up until we run out of room, or until we can send @@ -593,7 +590,7 @@ main_loop(void) // by somebody doing something to the queue. Go look and see // if there's anything for us. // - if (gc_jd_queue_dequeue(spu_args.queue, &jd_ea, &jd)) + if (gc_jd_queue_dequeue(spu_args.queue, &jd_ea, ci_tags + ci_idx, &jd)) process_job(jd_ea, &jd); gc_jd_queue_getllar(spu_args.queue); // get a new reservation @@ -608,7 +605,7 @@ main_loop(void) #else // try to get a job from the job queue - if (gc_jd_queue_dequeue(spu_args.queue, &jd_ea, &jd)){ + if (gc_jd_queue_dequeue(spu_args.queue, &jd_ea, ci_tags + ci_idx, &jd)){ total_jobs++; gc_log_write2(GCL_SS_SYS, 0x10, jd.sys.job_id, total_jobs); diff --git a/gcell/src/lib/runtime/spu/gc_spu_jd_queue.c b/gcell/src/lib/runtime/spu/gc_spu_jd_queue.c index ba4a1b9d..22752fe6 100644 --- a/gcell/src/lib/runtime/spu/gc_spu_jd_queue.c +++ b/gcell/src/lib/runtime/spu/gc_spu_jd_queue.c @@ -26,7 +26,8 @@ extern int gc_sys_tag; bool -gc_jd_queue_dequeue(gc_eaddr_t q, gc_eaddr_t *item_ea, gc_job_desc_t *item) +gc_jd_queue_dequeue(gc_eaddr_t q, gc_eaddr_t *item_ea, + int jd_tag, gc_job_desc_t *item) { gc_jd_queue_t local_q; @@ -61,8 +62,12 @@ gc_jd_queue_dequeue(gc_eaddr_t q, gc_eaddr_t *item_ea, gc_job_desc_t *item) // copy in job descriptor at head of queue *item_ea = local_q.head; - mfc_get(item, local_q.head, sizeof(gc_job_desc_t), gc_sys_tag, 0, 0); - mfc_write_tag_mask(1 << gc_sys_tag); // the tag we're interested in + + // We must use the fence with the jd_tag to ensure that any + // previously initiated put of a job desc is locally ordered before + // the get of the new one. + mfc_getf(item, local_q.head, sizeof(gc_job_desc_t), jd_tag, 0, 0); + mfc_write_tag_mask(1 << jd_tag); // the tag we're interested in mfc_read_tag_status_all(); // wait for DMA to complete local_q.head = item->sys.next; -- 2.30.2