I run into a strange issue on Mac OS X 10.8 with XCode Version 4.6.3 (4H1503) and clang: Apple LLVM version 4.2 (clang-425.0.28) (based on LLVM 3.2svn) Target: x86_64-apple-darwin12.4.0 Thread model: posix
I created a very basic thread_pool class and wanted to stress test it.
struct last_item_exception {}; template<class Task> struct thread_reclaimer { explicit thread_reclaimer(impl::basic_task_queue<Task>& q)noexcept : q_(q) {} void operator()() { try { while(true) { using namespace boost; q_.wait_and_pop()(); } } catch(last_item_exception) { TRACE_LOG << "caught last item exception"; } } private: impl::basic_task_queue<Task>& q_; }; template<class Task> thread_reclaimer<Task> make_thread_reclaimer (impl::basic_task_queue<Task>& queue)noexcept { return std::move(thread_reclaimer<Task>(queue)); } class thread_pool : public boost::noncopyable { typedef std::function<void()> task_type; public: explicit thread_pool(size_t number_of_threads) : threads_created_() , pool_() , queue_() , done_() { try { TRACE_LOG << "thread_pool: creating " <<number_of_threads << " threads"; while(threads_created_<number_of_threads) { ++threads_created_; pool_.create_thread(make_thread_reclaimer(queue_)); TRACE_LOG << "thread_pool: created thread number: " << threads_created_ ; } TRACE_LOG << "thread_pool: all threads started"; } catch(...) { TRACE_LOG << "thread_pool: exception occured"; finish_and_join_all(); throw; } TRACE_LOG << "thread_pool: constructor finished"; } ~thread_pool() { finish_and_join_all(); } bool enqueue(task_type t) { if(done_) return false; queue_.push(t, done_); return !done_; //if done was set inbetween, no push occured! } void finish_and_join_all() { TRACE_LOG << "entered: finish_and_join_all & done is: " << done_; if(done_) return; done_ = true; std::vector<task_type> cancellation_tasks ( threads_created_ , [] { TRACE_LOG << "throwing last item exception"; throw impl::last_item_exception(); } ) ; TRACE_LOG << "pushing last item to the queue"; // atomically pushes all cancellation tasks queue_.push(cancellation_tasks.begin(), cancellation_tasks.end()); threads_created_ = 0; TRACE_LOG << "waiting for all threads to finish"; pool_.join_all(); } private: size_t threads_created_; boost::thread_group pool_; //thread safe producer consumer queue, which uses condition variables for sync impl::basic_task_queue<std::function<void()>> queue_; std::atomic<bool> done_; }; My test case creates and destroys a thread_pool object with 1 thread, i.e.
for(size_t i = 0; i<100; ++i) thread_pool pool(1); On second loop iteration test case fails because done_ upon entering first time finish_and_join_all() is true. Changing std::atomic<bool> type to volatile bool fixes the problem.
The output of the test is below (which causes a condition_variable) failure, because destructor did not wait for all threads to finish:
Entering test case "ctor_stress_test" Assertion failed: (!ret), function ~condition_variable, file /usr/local/include/boost/thread/pthread/condition_variable_fwd.hpp, line 86. [2013-07-03 22:17:32.462265] [0x76f73180] [trace] thread_pool: creating 1 threads [2013-07-03 22:17:32.462303] [0x76f73180] [trace] thread_pool: created thread number: 1 [2013-07-03 22:17:32.462312] [0x76f73180] [trace] thread_pool: all threads started [2013-07-03 22:17:32.462320] [0x76f73180] [trace] thread_pool: constructor finished [2013-07-03 22:17:32.462326] [0x76f73180] [trace] entered: finish_and_join_all & done is: false [2013-07-03 22:17:32.462332] [0x76f73180] [trace] pushing last item to the queue [2013-07-03 22:17:32.462357] [0x76f73180] [trace] waiting for all threads to finish [2013-07-03 22:17:32.462383] [0x10d9e000] [trace] throwing last item exception [2013-07-03 22:17:32.462404] [0x10d9e000] [trace] caught last item exception [2013-07-03 22:17:32.462434] [0x76f73180] [trace] executing ~basic_task_queue [2013-07-03 22:17:32.462447] [0x76f73180] [trace] thread_pool: creating 1 threads [2013-07-03 22:17:32.462474] [0x76f73180] [trace] thread_pool: created thread number: 1 [2013-07-03 22:17:32.462480] [0x76f73180] [trace] thread_pool: all threads started [2013-07-03 22:17:32.462485] [0x76f73180] [trace] thread_pool: constructor finished [2013-07-03 22:17:32.462490] [0x76f73180] [trace] entered: finish_and_join_all & done is: true [2013-07-03 22:17:32.462495] [0x76f73180] [trace] executing ~basic_task_queue unknown location:0: fatal error in "ctor_stress_test": signal: SIGABRT (application abort requested) Leaving test case "ctor_stress_test"; testing time: 448mks Is this an intended behaviour of std::atomic?