refactored gc_jd_queue_data, faster mutex_unlock

author eb <eb@221aa14e-8319-0410-a670-987f0aec2ac5>

Thu, 5 Jun 2008 22:45:24 +0000 (22:45 +0000)

committer eb <eb@221aa14e-8319-0410-a670-987f0aec2ac5>

Thu, 5 Jun 2008 22:45:24 +0000 (22:45 +0000)
author eb <eb@221aa14e-8319-0410-a670-987f0aec2ac5>
Thu, 5 Jun 2008 22:45:24 +0000 (22:45 +0000)
committer eb <eb@221aa14e-8319-0410-a670-987f0aec2ac5>
Thu, 5 Jun 2008 22:45:24 +0000 (22:45 +0000)
diff --git a/gcell/src/include/gc_jd_queue_data.h b/gcell/src/include/gc_jd_queue_data.h

index d48591bd2e0101ba0cd997f7ae90bb6af8825c6c..e5fa874990fb0bbbfdd11619ee9a205b723dc1a4 100644 (file)
--- a/gcell/src/include/gc_jd_queue_data.h
+++ b/gcell/src/include/gc_jd_queue_data.h
@@ -1,6 +1,6 @@
  /* -*- c++ -*- */
  /*
- * Copyright 2007 Free Software Foundation, Inc.
+ * Copyright 2007,2008 Free Software Foundation, Inc.
   * 
   * This file is part of GNU Radio
   * 
@@ -36,13 +36,31 @@ __GC_BEGIN_DECLS
   *
   * FIXME make it lock free ;)  For now, use a spin lock.
   */
-typedef struct gc_jd_queue
+
+typedef struct gc_jd_q_links
  {
    gc_eaddr_t   head _AL16;
    gc_eaddr_t   tail _AL16;
+} gc_jd_q_links_t;
+
+typedef struct gc_jd_q_mutex
+{
    uint32_t     mutex;          // libsync mutex (spin lock)
-} gc_jd_queue_t;
+  uint32_t     _pad[31];       // pad to cache line so we can use putlluc on SPE
+} _AL128 gc_jd_q_mutex_t;
  
+typedef struct gc_jd_q_flag
+{
+  uint32_t     flag;           // host writes this after enqueuing
+  uint32_t     _pad[31];       // pad to cache line
+} _AL128 gc_jd_q_flag_t;
+
+typedef struct gc_jd_queue
+{
+  gc_jd_q_links_t      l;
+  gc_jd_q_mutex_t      m;
+  gc_jd_q_flag_t       f;
+} _AL128 gc_jd_queue_t;
  
  __GC_END_DECLS
  
diff --git a/gcell/src/lib/runtime/gc_jd_queue.c b/gcell/src/lib/runtime/gc_jd_queue.c

index b5cdcac9bb9edb177bca6ebee2a5d14a55a08432..29b74c29d602ad4c30893032c9b6493317990c53 100644 (file)
--- a/gcell/src/lib/runtime/gc_jd_queue.c
+++ b/gcell/src/lib/runtime/gc_jd_queue.c
@@ -28,9 +28,10 @@
  void 
  gc_jd_queue_init(gc_jd_queue_t *q)
  {
-  _mutex_init(ptr_to_ea(&q->mutex));
-  q->head = 0;
-  q->tail = 0;
+  _mutex_init(ptr_to_ea(&q->m.mutex));
+  q->l.head = 0;
+  q->l.tail = 0;
+  q->f.flag = 0;
    smp_wmb();
  }
    
@@ -38,41 +39,44 @@ void
  gc_jd_queue_enqueue(gc_jd_queue_t *q, gc_job_desc_t *item)
  {
    item->sys.next = 0;
-  _mutex_lock(ptr_to_ea(&q->mutex));
+  _mutex_lock(ptr_to_ea(&q->m.mutex));
    smp_rmb();           // import barrier
  
-  if (q->tail == 0){    // currently empty
-    q->tail = q->head = jdp_to_ea(item);
+  if (q->l.tail == 0){    // currently empty
+    q->l.tail = q->l.head = jdp_to_ea(item);
    }
    else {               // not empty, append
-    ea_to_jdp(q->tail)->sys.next = jdp_to_ea(item);
-    q->tail = jdp_to_ea(item);
+    ea_to_jdp(q->l.tail)->sys.next = jdp_to_ea(item);
+    q->l.tail = jdp_to_ea(item);
    }
  
    smp_wmb();           // orders stores above before clearing of mutex
-  _mutex_unlock(ptr_to_ea(&q->mutex));
+  _mutex_unlock(ptr_to_ea(&q->m.mutex));
+
+  // let SPE's know we wrote something if they've got a lock-line reservation
+  q->f.flag = 1;
  }
  
  gc_job_desc_t *
  gc_jd_queue_dequeue(gc_jd_queue_t *q)
  {
-  _mutex_lock(ptr_to_ea(&q->mutex));
+  _mutex_lock(ptr_to_ea(&q->m.mutex));
    smp_rmb();           // import barrier
    
-  gc_eaddr_t item_ea = q->head;
+  gc_eaddr_t item_ea = q->l.head;
    if (item_ea == 0){   // empty
-    _mutex_unlock(ptr_to_ea(&q->mutex));
+    _mutex_unlock(ptr_to_ea(&q->m.mutex));
      return 0;
    }
  
-  q->head = ea_to_jdp(item_ea)->sys.next;
-  if (q->head == 0)    // now emtpy
-    q->tail = 0;
+  q->l.head = ea_to_jdp(item_ea)->sys.next;
+  if (q->l.head == 0)  // now emtpy
+    q->l.tail = 0;
  
    gc_job_desc_t *item = ea_to_jdp(item_ea);
    item->sys.next = 0;
  
    smp_wmb();           // orders stores above before clearing of mutex
-  _mutex_unlock(ptr_to_ea(&q->mutex));
+  _mutex_unlock(ptr_to_ea(&q->m.mutex));
    return item;
  }
diff --git a/gcell/src/lib/runtime/spu/gc_spu_jd_queue.c b/gcell/src/lib/runtime/spu/gc_spu_jd_queue.c

index 22752fe68e17c91a674ee6fcea0d21251b506f74..0dd165fc006163cabda6e11e5ea016bfe3b2447b 100644 (file)
--- a/gcell/src/lib/runtime/spu/gc_spu_jd_queue.c
+++ b/gcell/src/lib/runtime/spu/gc_spu_jd_queue.c
@@ -25,11 +25,30 @@
  
  extern int gc_sys_tag;
  
+/*
+ * ea must be 128-byte aligned, the mutex is in the first int32_t, and
+ * it must be safe to write the remaining 124 bytes with anything at
+ * all.
+ */
+static __inline void _fast_mutex_unlock(mutex_ea_t ea)
+{
+  char _tmp[256];
+  vector signed int *buf
+    = (vector signed int *) ALIGN(_tmp, 128);  // get cache-aligned buffer
+
+  buf[0] = spu_splats(0);      // the value that unlocks the mutex
+
+  mfc_putlluc(buf, ea, 0, 0);  // unconditional put, no reservation reqd
+  spu_readch(MFC_RdAtomicStat);
+}
+
+
+
  bool
  gc_jd_queue_dequeue(gc_eaddr_t q, gc_eaddr_t *item_ea,
                     int jd_tag, gc_job_desc_t *item)
  {
-  gc_jd_queue_t        local_q;
+  gc_jd_q_links_t      local_q;
  
    // Before aquiring the lock, see if it's possible that there's
    // something in the queue.  Checking in this way makes it easier
@@ -37,7 +56,7 @@ gc_jd_queue_dequeue(gc_eaddr_t q, gc_eaddr_t *item_ea,
    // the lock unless there is something in the queue.
  
    // copy in the queue structure
-  mfc_get(&local_q, q, sizeof(gc_jd_queue_t), gc_sys_tag, 0, 0);
+  mfc_get(&local_q, q, sizeof(local_q), gc_sys_tag, 0, 0);
    mfc_write_tag_mask(1 << gc_sys_tag); // the tag we're interested in
    mfc_read_tag_status_all();           // wait for DMA to complete
  
@@ -48,15 +67,15 @@ gc_jd_queue_dequeue(gc_eaddr_t q, gc_eaddr_t *item_ea,
    // When we peeked, head was non-zero.  Now grab the
    // lock and do it for real.
  
-  _mutex_lock(q + offsetof(gc_jd_queue_t, mutex));
+  _mutex_lock(q + offsetof(gc_jd_queue_t, m.mutex));
  
    // copy in the queue structure
-  mfc_get(&local_q, q, sizeof(gc_jd_queue_t), gc_sys_tag, 0, 0);
+  mfc_get(&local_q, q, sizeof(local_q), gc_sys_tag, 0, 0);
    mfc_write_tag_mask(1 << gc_sys_tag); // the tag we're interested in
    mfc_read_tag_status_all();           // wait for DMA to complete
  
    if (local_q.head == 0){              // empty
-    _mutex_unlock(q + offsetof(gc_jd_queue_t, mutex));
+    _fast_mutex_unlock(q + offsetof(gc_jd_queue_t, m.mutex));
      return false;
    }
  
@@ -77,7 +96,7 @@ gc_jd_queue_dequeue(gc_eaddr_t q, gc_eaddr_t *item_ea,
  
  
    // copy the queue structure back out
-  mfc_put(&local_q, q, sizeof(gc_jd_queue_t), gc_sys_tag, 0, 0);
+  mfc_put(&local_q, q, sizeof(local_q), gc_sys_tag, 0, 0);
    mfc_write_tag_mask(1 << gc_sys_tag); // the tag we're interested in
    mfc_read_tag_status_all();           // wait for DMA to complete
  
@@ -89,7 +108,7 @@ gc_jd_queue_dequeue(gc_eaddr_t q, gc_eaddr_t *item_ea,
    // a normal DMA, and that a putlluc is better than a putllc if
    // you can use it.
  
-  _mutex_unlock(q + offsetof(gc_jd_queue_t, mutex));
+  _fast_mutex_unlock(q + offsetof(gc_jd_queue_t, m.mutex));
    return true;
  }
  
@@ -97,12 +116,12 @@ gc_jd_queue_dequeue(gc_eaddr_t q, gc_eaddr_t *item_ea,
  void
  gc_jd_queue_getllar(gc_eaddr_t q)
  {
-  // get reservation that includes the tail of the queue
-  gc_eaddr_t   tail = q + offsetof(gc_jd_queue_t, tail);
+  // get reservation that includes the flag in the queue
+  gc_eaddr_t   ea = q + offsetof(gc_jd_queue_t, f.flag);
      
    char _tmp[256];
    char *buf = (char *) ALIGN(_tmp, 128);       // get cache-aligned buffer
  
-  mfc_getllar(buf, ALIGN128_EA(tail), 0, 0);
+  mfc_getllar(buf, ALIGN128_EA(ea), 0, 0);
    spu_readch(MFC_RdAtomicStat);
  }
author	eb <eb@221aa14e-8319-0410-a670-987f0aec2ac5>
	Thu, 5 Jun 2008 22:45:24 +0000 (22:45 +0000)
committer	eb <eb@221aa14e-8319-0410-a670-987f0aec2ac5>
	Thu, 5 Jun 2008 22:45:24 +0000 (22:45 +0000)
gcell/src/include/gc_jd_queue_data.h		patch \| blob \| history
gcell/src/lib/runtime/gc_jd_queue.c		patch \| blob \| history
gcell/src/lib/runtime/spu/gc_spu_jd_queue.c		patch \| blob \| history