mirror of
https://github.com/ioacademy-jikim/debugging
synced 2025-06-08 00:16:11 +00:00
566 lines
19 KiB
C
566 lines
19 KiB
C
/*
|
|
This file is part of drd, a thread error detector.
|
|
|
|
Copyright (C) 2006-2015 Bart Van Assche <bvanassche@acm.org>.
|
|
|
|
This program is free software; you can redistribute it and/or
|
|
modify it under the terms of the GNU General Public License as
|
|
published by the Free Software Foundation; either version 2 of the
|
|
License, or (at your option) any later version.
|
|
|
|
This program is distributed in the hope that it will be useful, but
|
|
WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
General Public License for more details.
|
|
|
|
You should have received a copy of the GNU General Public License
|
|
along with this program; if not, write to the Free Software
|
|
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
|
|
02111-1307, USA.
|
|
|
|
The GNU General Public License is contained in the file COPYING.
|
|
*/
|
|
|
|
|
|
#include "drd_barrier.h"
|
|
#include "drd_clientobj.h"
|
|
#include "drd_error.h"
|
|
#include "drd_suppression.h"
|
|
#include "pub_tool_errormgr.h" // VG_(maybe_record_error)()
|
|
#include "pub_tool_libcassert.h" // tl_assert()
|
|
#include "pub_tool_libcprint.h" // VG_(printf)()
|
|
#include "pub_tool_machine.h" // VG_(get_IP)()
|
|
#include "pub_tool_mallocfree.h" // VG_(malloc)(), VG_(free)()
|
|
#include "pub_tool_oset.h"
|
|
#include "pub_tool_threadstate.h" // VG_(get_running_tid)()
|
|
|
|
|
|
/* Type definitions. */
|
|
|
|
/** Information associated with one thread participating in a barrier. */
|
|
struct barrier_thread_info
|
|
{
|
|
UWord tid; // A DrdThreadId declared as UWord because
|
|
// this member variable is the key of an OSet.
|
|
Segment* sg; // Segment of the last pthread_barrier() call
|
|
// by thread tid.
|
|
Segment* post_wait_sg; // Segment created after *_barrier_wait() finished
|
|
ExeContext* wait_call_ctxt;// call stack for *_barrier_wait() call.
|
|
Bool thread_finished;// Whether thread 'tid' has finished.
|
|
};
|
|
|
|
|
|
/* Local functions. */
|
|
|
|
static void barrier_cleanup(struct barrier_info* p);
|
|
static void barrier_delete_thread(struct barrier_info* const p,
|
|
const DrdThreadId tid);
|
|
static const HChar* barrier_get_typename(struct barrier_info* const p);
|
|
static const HChar* barrier_type_name(const BarrierT bt);
|
|
static
|
|
void barrier_report_wait_delete_race(const struct barrier_info* const p,
|
|
const struct barrier_thread_info* const q);
|
|
|
|
|
|
/* Local variables. */
|
|
|
|
static Bool s_trace_barrier = False;
|
|
static ULong s_barrier_segment_creation_count;
|
|
|
|
|
|
/* Function definitions. */
|
|
|
|
void DRD_(barrier_set_trace)(const Bool trace_barrier)
|
|
{
|
|
s_trace_barrier = trace_barrier;
|
|
}
|
|
|
|
/**
|
|
* Initialize the structure *p with the specified thread ID and iteration
|
|
* information.
|
|
*/
|
|
static
|
|
void DRD_(barrier_thread_initialize)(struct barrier_thread_info* const p,
|
|
const DrdThreadId tid)
|
|
{
|
|
p->tid = tid;
|
|
p->sg = NULL;
|
|
p->post_wait_sg = 0;
|
|
p->wait_call_ctxt = 0;
|
|
p->thread_finished = False;
|
|
}
|
|
|
|
/**
|
|
* Deallocate the memory that is owned by members of
|
|
* struct barrier_thread_info.
|
|
*/
|
|
static void DRD_(barrier_thread_destroy)(struct barrier_thread_info* const p)
|
|
{
|
|
tl_assert(p);
|
|
DRD_(sg_put)(p->sg);
|
|
DRD_(sg_put)(p->post_wait_sg);
|
|
}
|
|
|
|
/**
|
|
* Initialize the structure *p with the specified client-side barrier address,
|
|
* barrier object size and number of participants in each barrier.
|
|
*/
|
|
static
|
|
void DRD_(barrier_initialize)(struct barrier_info* const p,
|
|
const Addr barrier,
|
|
const BarrierT barrier_type,
|
|
const Word count)
|
|
{
|
|
int i;
|
|
|
|
tl_assert(barrier != 0);
|
|
tl_assert(barrier_type == pthread_barrier || barrier_type == gomp_barrier);
|
|
tl_assert(p->a1 == barrier);
|
|
|
|
p->cleanup = (void(*)(DrdClientobj*))barrier_cleanup;
|
|
p->delete_thread
|
|
= (void(*)(DrdClientobj*, DrdThreadId))barrier_delete_thread;
|
|
p->barrier_type = barrier_type;
|
|
p->count = count;
|
|
p->pre_iteration = 0;
|
|
p->post_iteration = 0;
|
|
p->pre_waiters_left = count;
|
|
p->post_waiters_left = count;
|
|
|
|
tl_assert(sizeof(((struct barrier_thread_info*)0)->tid) == sizeof(Word));
|
|
tl_assert(sizeof(((struct barrier_thread_info*)0)->tid)
|
|
>= sizeof(DrdThreadId));
|
|
for (i = 0; i < 2; i++) {
|
|
p->oset[i] = VG_(OSetGen_Create)(0, 0, VG_(malloc), "drd.barrier.bi.1",
|
|
VG_(free));
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Deallocate the memory owned by the struct barrier_info object and also
|
|
* all the nodes in the OSet p->oset.
|
|
*
|
|
* Called by clientobj_destroy().
|
|
*/
|
|
static void barrier_cleanup(struct barrier_info* p)
|
|
{
|
|
struct barrier_thread_info* q;
|
|
Segment* latest_sg = 0;
|
|
OSet* oset;
|
|
int i;
|
|
|
|
tl_assert(p);
|
|
|
|
DRD_(thread_get_latest_segment)(&latest_sg, DRD_(thread_get_running_tid)());
|
|
tl_assert(latest_sg);
|
|
|
|
if (p->pre_waiters_left != p->count) {
|
|
BarrierErrInfo bei = { DRD_(thread_get_running_tid)(), p->a1, 0, 0 };
|
|
VG_(maybe_record_error)(VG_(get_running_tid)(),
|
|
BarrierErr,
|
|
VG_(get_IP)(VG_(get_running_tid)()),
|
|
"Destruction of barrier that is being waited"
|
|
" upon",
|
|
&bei);
|
|
} else {
|
|
oset = p->oset[1 - (p->pre_iteration & 1)];
|
|
VG_(OSetGen_ResetIter)(oset);
|
|
for ( ; (q = VG_(OSetGen_Next)(oset)) != 0; ) {
|
|
if (q->post_wait_sg && !DRD_(vc_lte)(&q->post_wait_sg->vc,
|
|
&latest_sg->vc))
|
|
{
|
|
barrier_report_wait_delete_race(p, q);
|
|
}
|
|
DRD_(barrier_thread_destroy)(q);
|
|
}
|
|
}
|
|
|
|
for (i = 0; i < 2; i++) {
|
|
VG_(OSetGen_Destroy)(p->oset[i]);
|
|
p->oset[i] = NULL;
|
|
}
|
|
|
|
DRD_(sg_put)(latest_sg);
|
|
}
|
|
|
|
/**
|
|
* Look up the client-side barrier address barrier in s_barrier[]. If not
|
|
* found, add it.
|
|
*/
|
|
static
|
|
struct barrier_info*
|
|
DRD_(barrier_get_or_allocate)(const Addr barrier,
|
|
const BarrierT barrier_type, const Word count)
|
|
{
|
|
struct barrier_info *p;
|
|
|
|
tl_assert(barrier_type == pthread_barrier || barrier_type == gomp_barrier);
|
|
|
|
tl_assert(offsetof(DrdClientobj, barrier) == 0);
|
|
p = &(DRD_(clientobj_get)(barrier, ClientBarrier)->barrier);
|
|
if (p == 0)
|
|
{
|
|
p = &(DRD_(clientobj_add)(barrier, ClientBarrier)->barrier);
|
|
DRD_(barrier_initialize)(p, barrier, barrier_type, count);
|
|
}
|
|
return p;
|
|
}
|
|
|
|
/**
|
|
* Look up the address of the struct barrier_info associated with the
|
|
* client-side barrier object.
|
|
*/
|
|
static struct barrier_info* DRD_(barrier_get)(const Addr barrier)
|
|
{
|
|
tl_assert(offsetof(DrdClientobj, barrier) == 0);
|
|
return &(DRD_(clientobj_get)(barrier, ClientBarrier)->barrier);
|
|
}
|
|
|
|
/**
|
|
* Initialize a barrier with given client address, barrier type and number of
|
|
* participants. The 'reinitialization' argument indicates whether a barrier
|
|
* object is being initialized or reinitialized.
|
|
*
|
|
* Called before pthread_barrier_init().
|
|
*/
|
|
void DRD_(barrier_init)(const Addr barrier,
|
|
const BarrierT barrier_type, const Word count,
|
|
const Bool reinitialization)
|
|
{
|
|
struct barrier_info* p;
|
|
|
|
tl_assert(barrier_type == pthread_barrier || barrier_type == gomp_barrier);
|
|
|
|
if (count == 0)
|
|
{
|
|
BarrierErrInfo bei = { DRD_(thread_get_running_tid)(), barrier, 0, 0 };
|
|
VG_(maybe_record_error)(VG_(get_running_tid)(),
|
|
BarrierErr,
|
|
VG_(get_IP)(VG_(get_running_tid)()),
|
|
"pthread_barrier_init: 'count' argument is zero",
|
|
&bei);
|
|
}
|
|
|
|
if (! reinitialization && barrier_type == pthread_barrier)
|
|
{
|
|
p = DRD_(barrier_get)(barrier);
|
|
if (p)
|
|
{
|
|
BarrierErrInfo bei = { DRD_(thread_get_running_tid)(), barrier, 0, 0 };
|
|
VG_(maybe_record_error)(VG_(get_running_tid)(),
|
|
BarrierErr,
|
|
VG_(get_IP)(VG_(get_running_tid)()),
|
|
"Barrier reinitialization",
|
|
&bei);
|
|
}
|
|
}
|
|
|
|
p = DRD_(barrier_get_or_allocate)(barrier, barrier_type, count);
|
|
|
|
if (s_trace_barrier) {
|
|
if (reinitialization)
|
|
DRD_(trace_msg)("[%u] barrier_reinit %s 0x%lx count %ld -> %ld",
|
|
DRD_(thread_get_running_tid)(),
|
|
barrier_get_typename(p), barrier, p->count, count);
|
|
else
|
|
DRD_(trace_msg)("[%u] barrier_init %s 0x%lx",
|
|
DRD_(thread_get_running_tid)(),
|
|
barrier_get_typename(p),
|
|
barrier);
|
|
}
|
|
|
|
if (reinitialization && p->count != count)
|
|
{
|
|
if (p->pre_waiters_left != p->count || p->post_waiters_left != p->count)
|
|
{
|
|
BarrierErrInfo bei = { DRD_(thread_get_running_tid)(), p->a1, 0, 0 };
|
|
VG_(maybe_record_error)(VG_(get_running_tid)(),
|
|
BarrierErr,
|
|
VG_(get_IP)(VG_(get_running_tid)()),
|
|
"Reinitialization of barrier with active"
|
|
" waiters",
|
|
&bei);
|
|
}
|
|
p->count = count;
|
|
}
|
|
}
|
|
|
|
/** Called after pthread_barrier_destroy() / gomp_barrier_destroy(). */
|
|
void DRD_(barrier_destroy)(const Addr barrier, const BarrierT barrier_type)
|
|
{
|
|
struct barrier_info* p;
|
|
|
|
p = DRD_(barrier_get)(barrier);
|
|
|
|
if (s_trace_barrier)
|
|
DRD_(trace_msg)("[%u] barrier_destroy %s 0x%lx",
|
|
DRD_(thread_get_running_tid)(),
|
|
barrier_get_typename(p), barrier);
|
|
|
|
if (p == 0)
|
|
{
|
|
GenericErrInfo GEI = {
|
|
.tid = DRD_(thread_get_running_tid)(),
|
|
.addr = barrier,
|
|
};
|
|
VG_(maybe_record_error)(VG_(get_running_tid)(),
|
|
GenericErr,
|
|
VG_(get_IP)(VG_(get_running_tid)()),
|
|
"Not a barrier",
|
|
&GEI);
|
|
return;
|
|
}
|
|
|
|
if (p->pre_waiters_left != p->count || p->post_waiters_left != p->count)
|
|
{
|
|
BarrierErrInfo bei = { DRD_(thread_get_running_tid)(), p->a1, 0, 0 };
|
|
VG_(maybe_record_error)(VG_(get_running_tid)(),
|
|
BarrierErr,
|
|
VG_(get_IP)(VG_(get_running_tid)()),
|
|
"Destruction of a barrier with active waiters",
|
|
&bei);
|
|
}
|
|
|
|
DRD_(clientobj_remove)(p->a1, ClientBarrier);
|
|
}
|
|
|
|
/** Called before pthread_barrier_wait() / gomp_barrier_wait(). */
|
|
void DRD_(barrier_pre_wait)(const DrdThreadId tid, const Addr barrier,
|
|
const BarrierT barrier_type)
|
|
{
|
|
struct barrier_info* p;
|
|
struct barrier_thread_info* q;
|
|
const UWord word_tid = tid;
|
|
OSet* oset;
|
|
|
|
p = DRD_(barrier_get)(barrier);
|
|
if (p == 0 && barrier_type == gomp_barrier) {
|
|
/*
|
|
* gomp_barrier_wait() call has been intercepted but gomp_barrier_init()
|
|
* not. The only cause I know of that can trigger this is that libgomp.so
|
|
* has been compiled with --enable-linux-futex.
|
|
*/
|
|
BarrierErrInfo bei = { DRD_(thread_get_running_tid)(), 0, 0, 0 };
|
|
VG_(maybe_record_error)(VG_(get_running_tid)(),
|
|
BarrierErr,
|
|
VG_(get_IP)(VG_(get_running_tid)()),
|
|
"Please verify whether gcc has been configured"
|
|
" with option --disable-linux-futex. See also"
|
|
" the section about OpenMP in the DRD manual.",
|
|
&bei);
|
|
}
|
|
tl_assert(p);
|
|
|
|
if (s_trace_barrier)
|
|
DRD_(trace_msg)("[%u] barrier_pre_wait %s 0x%lx iteration %ld",
|
|
DRD_(thread_get_running_tid)(),
|
|
barrier_get_typename(p), barrier, p->pre_iteration);
|
|
|
|
/* Clean up nodes associated with finished threads. */
|
|
oset = p->oset[p->pre_iteration & 1];
|
|
tl_assert(oset);
|
|
VG_(OSetGen_ResetIter)(oset);
|
|
for ( ; (q = VG_(OSetGen_Next)(oset)) != 0; ) {
|
|
if (q->thread_finished) {
|
|
void* r = VG_(OSetGen_Remove)(oset, &q->tid);
|
|
tl_assert(r == q);
|
|
DRD_(barrier_thread_destroy)(q);
|
|
VG_(OSetGen_FreeNode)(oset, q);
|
|
VG_(OSetGen_ResetIterAt)(oset, &word_tid);
|
|
}
|
|
}
|
|
/* Allocate the per-thread data structure if necessary. */
|
|
q = VG_(OSetGen_Lookup)(oset, &word_tid);
|
|
if (q == NULL) {
|
|
q = VG_(OSetGen_AllocNode)(oset, sizeof(*q));
|
|
DRD_(barrier_thread_initialize)(q, tid);
|
|
VG_(OSetGen_Insert)(oset, q);
|
|
tl_assert(VG_(OSetGen_Lookup)(oset, &word_tid) == q);
|
|
}
|
|
|
|
/* Record *_barrier_wait() call context. */
|
|
q->wait_call_ctxt = VG_(record_ExeContext)(VG_(get_running_tid)(), 0);
|
|
|
|
/*
|
|
* Store a pointer to the latest segment of the current thread in the
|
|
* per-thread data structure.
|
|
*/
|
|
DRD_(thread_get_latest_segment)(&q->sg, tid);
|
|
|
|
/*
|
|
* If the same number of threads as the barrier count indicates have
|
|
* called the pre *_barrier_wait() wrapper, toggle p->pre_iteration and
|
|
* reset the p->pre_waiters_left counter.
|
|
*/
|
|
if (--p->pre_waiters_left <= 0)
|
|
{
|
|
p->pre_iteration++;
|
|
p->pre_waiters_left = p->count;
|
|
}
|
|
}
|
|
|
|
/** Called after pthread_barrier_wait() / gomp_barrier_wait(). */
|
|
void DRD_(barrier_post_wait)(const DrdThreadId tid, const Addr barrier,
|
|
const BarrierT barrier_type, const Bool waited,
|
|
const Bool serializing)
|
|
{
|
|
struct barrier_info* p;
|
|
const UWord word_tid = tid;
|
|
struct barrier_thread_info* q;
|
|
struct barrier_thread_info* r;
|
|
OSet* oset;
|
|
|
|
p = DRD_(barrier_get)(barrier);
|
|
|
|
if (s_trace_barrier)
|
|
DRD_(trace_msg)("[%u] barrier_post_wait %s 0x%lx iteration %ld%s",
|
|
tid, p ? barrier_get_typename(p) : "(?)",
|
|
barrier, p ? p->post_iteration : -1,
|
|
serializing ? " (serializing)" : "");
|
|
|
|
/*
|
|
* If p == 0, this means that the barrier has been destroyed after
|
|
* *_barrier_wait() returned and before this function was called. Just
|
|
* return in that case -- race conditions between *_barrier_wait()
|
|
* and *_barrier_destroy() are detected by the *_barrier_destroy() wrapper.
|
|
*/
|
|
if (p == 0)
|
|
return;
|
|
|
|
/* If the *_barrier_wait() call returned an error code, exit. */
|
|
if (! waited)
|
|
return;
|
|
|
|
oset = p->oset[p->post_iteration & 1];
|
|
q = VG_(OSetGen_Lookup)(oset, &word_tid);
|
|
if (p->pre_iteration - p->post_iteration > 1) {
|
|
BarrierErrInfo bei = { DRD_(thread_get_running_tid)(), p->a1, 0, 0 };
|
|
VG_(maybe_record_error)(VG_(get_running_tid)(),
|
|
BarrierErr,
|
|
VG_(get_IP)(VG_(get_running_tid)()),
|
|
"Number of concurrent pthread_barrier_wait()"
|
|
" calls exceeds the barrier count",
|
|
&bei);
|
|
} else if (q == NULL) {
|
|
BarrierErrInfo bei = { DRD_(thread_get_running_tid)(), p->a1, 0, 0 };
|
|
VG_(maybe_record_error)(VG_(get_running_tid)(),
|
|
BarrierErr,
|
|
VG_(get_IP)(VG_(get_running_tid)()),
|
|
"Error in barrier implementation"
|
|
" -- barrier_wait() started before"
|
|
" barrier_destroy() and finished after"
|
|
" barrier_destroy()",
|
|
&bei);
|
|
}
|
|
if (q == NULL) {
|
|
q = VG_(OSetGen_AllocNode)(oset, sizeof(*q));
|
|
DRD_(barrier_thread_initialize)(q, tid);
|
|
VG_(OSetGen_Insert)(oset, q);
|
|
tl_assert(VG_(OSetGen_Lookup)(oset, &word_tid) == q);
|
|
DRD_(thread_get_latest_segment)(&q->sg, tid);
|
|
}
|
|
|
|
/* Create a new segment and store a pointer to that segment. */
|
|
DRD_(thread_new_segment)(tid);
|
|
DRD_(thread_get_latest_segment)(&q->post_wait_sg, tid);
|
|
s_barrier_segment_creation_count++;
|
|
|
|
/*
|
|
* Combine all vector clocks that were stored in the pre_barrier_wait
|
|
* wrapper with the vector clock of the current thread.
|
|
*/
|
|
{
|
|
VectorClock old_vc;
|
|
|
|
DRD_(vc_copy)(&old_vc, DRD_(thread_get_vc)(tid));
|
|
VG_(OSetGen_ResetIter)(oset);
|
|
for ( ; (r = VG_(OSetGen_Next)(oset)) != 0; )
|
|
{
|
|
if (r != q)
|
|
{
|
|
tl_assert(r->sg);
|
|
DRD_(vc_combine)(DRD_(thread_get_vc)(tid), &r->sg->vc);
|
|
}
|
|
}
|
|
DRD_(thread_update_conflict_set)(tid, &old_vc);
|
|
DRD_(vc_cleanup)(&old_vc);
|
|
}
|
|
|
|
/*
|
|
* If the same number of threads as the barrier count indicates have
|
|
* called the post *_barrier_wait() wrapper, toggle p->post_iteration and
|
|
* reset the p->post_waiters_left counter.
|
|
*/
|
|
if (--p->post_waiters_left <= 0)
|
|
{
|
|
p->post_iteration++;
|
|
p->post_waiters_left = p->count;
|
|
}
|
|
}
|
|
|
|
/** Called when thread tid stops to exist. */
|
|
static void barrier_delete_thread(struct barrier_info* const p,
|
|
const DrdThreadId tid)
|
|
{
|
|
struct barrier_thread_info* q;
|
|
const UWord word_tid = tid;
|
|
int i;
|
|
|
|
for (i = 0; i < 2; i++) {
|
|
q = VG_(OSetGen_Lookup)(p->oset[i], &word_tid);
|
|
if (q)
|
|
q->thread_finished = True;
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Report that *_barrier_destroy() has been called but that this call was
|
|
* not synchronized with the last *_barrier_wait() call on the same barrier.
|
|
*
|
|
* This topic has been discussed extensively on comp.programming.threads
|
|
* (February 3, 2009). See also
|
|
* <a href="http://groups.google.com/group/comp.programming.threads/browse_thread/thread/4f65535d6192aa50/a5f4bf1e3b437c4d">Immediately destroying pthread barriers</a>.
|
|
*/
|
|
static
|
|
void barrier_report_wait_delete_race(const struct barrier_info* const p,
|
|
const struct barrier_thread_info* const q)
|
|
{
|
|
tl_assert(p);
|
|
tl_assert(q);
|
|
|
|
{
|
|
BarrierErrInfo bei
|
|
= { DRD_(thread_get_running_tid)(), p->a1, q->tid, q->wait_call_ctxt };
|
|
VG_(maybe_record_error)(VG_(get_running_tid)(),
|
|
BarrierErr,
|
|
VG_(get_IP)(VG_(get_running_tid)()),
|
|
"Destruction of barrier not synchronized with"
|
|
" barrier wait call",
|
|
&bei);
|
|
}
|
|
}
|
|
|
|
static const HChar* barrier_get_typename(struct barrier_info* const p)
|
|
{
|
|
tl_assert(p);
|
|
|
|
return barrier_type_name(p->barrier_type);
|
|
}
|
|
|
|
static const HChar* barrier_type_name(const BarrierT bt)
|
|
{
|
|
switch (bt)
|
|
{
|
|
case pthread_barrier:
|
|
return "pthread barrier";
|
|
case gomp_barrier:
|
|
return "gomp barrier";
|
|
}
|
|
return "?";
|
|
}
|
|
|
|
ULong DRD_(get_barrier_segment_creation_count)(void)
|
|
{
|
|
return s_barrier_segment_creation_count;
|
|
}
|