Skip to main content
New semaphore implementation
Source Link
user289860
user289860
struct mutex { int ftx; }; enum { UNLOCKED, LOCKED, CONTESTED }; void mutex_init(struct mutex *mtx) { mtx->ftx = UNLOCKED; } voidbool mutex_lockmutex_trylock(struct mutex *mtx) { ifreturn (atomic_cmpxchg(&mtx->ftx, UNLOCKED, LOCKED) == UNLOCKEDUNLOCKED; } void mutex_lock(struct mutex *mtx) { if (mutex_trylock(mtx)) return; while (atomic_xchg(&mtx->ftx, CONTESTED) != UNLOCKED) futex_wait(&mtx->ftx, CONTESTED); } void mutex_unlock(struct mutex *mtx) { if (atomic_xchg(&mtx->ftx, UNLOCKED) == CONTESTED) futex_wake(&mtx->ftx, 1); } 
struct sema { int ftx; int waiters; }; enum { LOCKED = 0, CONTESTED = -1 }; void sema_init(struct sema *sem) { sem->ftx = 0;LOCKED; sem->waiters = 0; } bool sema_trywait(struct sema *sem) { int val = atomic_load(&sem->ftx); do { if (val ==<= 0LOCKED) return false; } while ((val = atomic_cmpxchg(&sem->ftx, val, val - 1)) != val); return true; } void sema_wait(struct sema *sem) { if (sema_trywait(sem)) return; atomic_add(&sem->waiters, 1); do { atomic_cmpxchg(&sem->ftx, LOCKED, CONTESTED); futex_wait(&sem->ftx, 0CONTESTED); } while (!sema_trywait(sem)); atomic_sub(&sem->waiters, 1); } void sema_post(struct sema *sem, int n) { atomic_addint new, waiters, val = atomic_load(&sem->ftx, n); //do This{  accesses semaphore after unlocking. What if it waswaiters destroyed?= atomic_load(&sem->waiters); // You can try tonew think= about(int) how((unsigned thisint) canval be+ fixed.n + (val < LOCKED)); /} while (!atomic_cmpxchg(&sem->ftx, &val, new)); /* IThe won'tsemaphore puthas correctbeen implementationunlocked hereand duecould tobe higherdeallocated,  complexity. * so it must not be touch - hence the extra CONTESTED state */ if (atomic_load(&sem->waiters)val !=< 0LOCKED || waiters) futex_wake(&sem->ftx, n); // no point in waking more than 'n' } 
struct mutex { int ftx; }; enum { UNLOCKED, LOCKED, CONTESTED }; void mutex_init(struct mutex *mtx) { mtx->ftx = UNLOCKED; } void mutex_lock(struct mutex *mtx) { if (atomic_cmpxchg(&mtx->ftx, UNLOCKED, LOCKED) == UNLOCKED) return; while (atomic_xchg(&mtx->ftx, CONTESTED) != UNLOCKED) futex_wait(&mtx->ftx, CONTESTED); } void mutex_unlock(struct mutex *mtx) { if (atomic_xchg(&mtx->ftx, UNLOCKED) == CONTESTED) futex_wake(&mtx->ftx, 1); } 
struct sema { int ftx; int waiters; }; void sema_init(struct sema *sem) { sem->ftx = 0; sem->waiters = 0; } bool sema_trywait(struct sema *sem) { int val = atomic_load(&sem->ftx); do { if (val == 0) return false; } while ((val = atomic_cmpxchg(&sem->ftx, val, val - 1)) != val); return true; } void sema_wait(struct sema *sem) { if (sema_trywait(sem)) return; atomic_add(&sem->waiters, 1); do { futex_wait(&sem->ftx, 0); } while (!sema_trywait(sem)); atomic_sub(&sem->waiters, 1); } void sema_post(struct sema *sem, int n) { atomic_add(&sem->ftx, n); // This accesses semaphore after unlocking. What if it was destroyed? // You can try to think about how this can be fixed. // I won't put correct implementation here due to higher complexity. if (atomic_load(&sem->waiters) != 0) futex_wake(&sem->ftx, n); // no point in waking more than 'n' } 
struct mutex { int ftx; }; enum { UNLOCKED, LOCKED, CONTESTED }; void mutex_init(struct mutex *mtx) { mtx->ftx = UNLOCKED; } bool mutex_trylock(struct mutex *mtx) { return atomic_cmpxchg(&mtx->ftx, UNLOCKED, LOCKED) == UNLOCKED; } void mutex_lock(struct mutex *mtx) { if (mutex_trylock(mtx)) return; while (atomic_xchg(&mtx->ftx, CONTESTED) != UNLOCKED) futex_wait(&mtx->ftx, CONTESTED); } void mutex_unlock(struct mutex *mtx) { if (atomic_xchg(&mtx->ftx, UNLOCKED) == CONTESTED) futex_wake(&mtx->ftx, 1); } 
struct sema { int ftx; int waiters; }; enum { LOCKED = 0, CONTESTED = -1 }; void sema_init(struct sema *sem) { sem->ftx = LOCKED; sem->waiters = 0; } bool sema_trywait(struct sema *sem) { int val = atomic_load(&sem->ftx); do { if (val <= LOCKED) return false; } while ((val = atomic_cmpxchg(&sem->ftx, val, val - 1)) != val); return true; } void sema_wait(struct sema *sem) { if (sema_trywait(sem)) return; atomic_add(&sem->waiters, 1); do { atomic_cmpxchg(&sem->ftx, LOCKED, CONTESTED); futex_wait(&sem->ftx, CONTESTED); } while (!sema_trywait(sem)); atomic_sub(&sem->waiters, 1); } void sema_post(struct sema *sem, int n) { int new, waiters, val = atomic_load(&sem->ftx); do {  waiters = atomic_load(&sem->waiters); new = (int) ((unsigned int) val + n + (val < LOCKED)); } while (!atomic_cmpxchg(&sem->ftx, &val, new)); /* The semaphore has been unlocked and could be deallocated,   * so it must not be touch - hence the extra CONTESTED state */ if (val < LOCKED || waiters) futex_wake(&sem->ftx, n); } 
deleted 43 characters in body
Source Link
user289860
user289860
struct sema { int ftx; int waiters; }; void sema_init(struct sema *sem) { sem->ftx = 0; sem->waiters = 0; } bool sema_trywait(struct sema *sem) { int val = atomic_load(&sem->ftx); do { if (val == 0) return false; } while ((val = atomic_cmpxchg(&sem->ftx, val, val - 1)) != val); return true; } void sema_wait(struct sema *sem) { if (sema_trywait(sem)) return; atomic_add(&sem->waiters, 1); do { futex_wait(&sem->ftx, 0); } while (!sema_trywait(sem)); atomic_sub(&sem->waiters, 1); } void sema_post(struct sema *sem, int n) { int val = atomic_load(&sem->ftx); atomic_add(&sem->ftx, n); // This accesses semaphore after unlocking. What if it was destroyed? // You can try to think about how this can be fixed. // I won't put correct implementation here due to higher complexity. if (atomic_load(&sem->waiters) != 0) futex_wake(&sem->ftx, n); // no point in waking more than 'n' } 
struct sema { int ftx; int waiters; }; void sema_init(struct sema *sem) { sem->ftx = 0; sem->waiters = 0; } bool sema_trywait(struct sema *sem) { int val = atomic_load(&sem->ftx); do { if (val == 0) return false; } while ((val = atomic_cmpxchg(&sem->ftx, val, val - 1)) != val); return true; } void sema_wait(struct sema *sem) { if (sema_trywait(sem)) return; atomic_add(&sem->waiters, 1); do { futex_wait(&sem->ftx, 0); } while (!sema_trywait(sem)); atomic_sub(&sem->waiters, 1); } void sema_post(struct sema *sem, int n) { int val = atomic_load(&sem->ftx); atomic_add(&sem->ftx, n); // This accesses semaphore after unlocking. What if it was destroyed? // You can try to think about how this can be fixed. // I won't put correct implementation here due to higher complexity. if (atomic_load(&sem->waiters) != 0) futex_wake(&sem->ftx, n); // no point in waking more than 'n' } 
struct sema { int ftx; int waiters; }; void sema_init(struct sema *sem) { sem->ftx = 0; sem->waiters = 0; } bool sema_trywait(struct sema *sem) { int val = atomic_load(&sem->ftx); do { if (val == 0) return false; } while ((val = atomic_cmpxchg(&sem->ftx, val, val - 1)) != val); return true; } void sema_wait(struct sema *sem) { if (sema_trywait(sem)) return; atomic_add(&sem->waiters, 1); do { futex_wait(&sem->ftx, 0); } while (!sema_trywait(sem)); atomic_sub(&sem->waiters, 1); } void sema_post(struct sema *sem, int n) { atomic_add(&sem->ftx, n); // This accesses semaphore after unlocking. What if it was destroyed? // You can try to think about how this can be fixed. // I won't put correct implementation here due to higher complexity. if (atomic_load(&sem->waiters) != 0) futex_wake(&sem->ftx, n); // no point in waking more than 'n' } 
added 2 characters in body
Source Link
user289860
user289860
void WINAPI RtlWakeConditionVariable( RTL_CONDITION_VARIABLE *variable ) { if (interlocked_dec_if_nonzero( (int *)&variable->Ptr )) NtReleaseKeyedEvent( keyed_event, &variable->Ptr, FALSE, NULL); } void WINAPI RtlWakeAllConditionVariable( RTL_CONDITION_VARIABLE *variable ) { int val = interlocked_xchg( (int *)&variable->Ptr, 0 ); while (val-- > 0) NtReleaseKeyedEvent( keyed_event, &variable->Ptr, FALSE, NULL); } void RtlSleepConditionVariableSRW( RTL_CONDITION_VARIABLE *variable, RTL_SRWLOCK *lock) { interlocked_xchg_add( (int *)&variable->Ptr, 1 ); RtlReleaseSRWLockExclusive( lock ); NtWaitForKeyedEvent( keyed_event, &variable->Ptr, FALSE, timeout );  RtlAcquireSRWLockExclusive( lock ); } 

Another way is to emulate futex-like functionality fromin userspace. WebKit's ParkingLot is an example of this. You can read more here. Mechanism is basically the same as with futex(2), but since it is implemented in userspace isit allows execution of custom user code from inside the queue lock. This makes it possible to store less information in synchronization object and be overall smarter with thread scheduling. Unfortunately, performance in contested case will most likely be slower, as it will use futex(2) underneath anyway to block the calling thread, so there is an additional layer of code and locking that needs to be executed.

void WINAPI RtlWakeConditionVariable( RTL_CONDITION_VARIABLE *variable ) { if (interlocked_dec_if_nonzero( (int *)&variable->Ptr )) NtReleaseKeyedEvent( keyed_event, &variable->Ptr, FALSE, NULL); } void WINAPI RtlWakeAllConditionVariable( RTL_CONDITION_VARIABLE *variable ) { int val = interlocked_xchg( (int *)&variable->Ptr, 0 ); while (val-- > 0) NtReleaseKeyedEvent( keyed_event, &variable->Ptr, FALSE, NULL); } void RtlSleepConditionVariableSRW( RTL_CONDITION_VARIABLE *variable, RTL_SRWLOCK *lock) { interlocked_xchg_add( (int *)&variable->Ptr, 1 ); RtlReleaseSRWLockExclusive( lock ); NtWaitForKeyedEvent( keyed_event, &variable->Ptr, FALSE, timeout RtlAcquireSRWLockExclusive( lock ); } 

Another way is to emulate futex-like functionality from userspace. WebKit's ParkingLot is an example of this. You can read more here. Mechanism is basically the same as with futex(2), but since it is implemented in userspace is allows execution of custom user code from inside the queue lock. This makes it possible to store less information in synchronization object and be overall smarter with thread scheduling. Unfortunately, performance in contested case will most likely be slower, as it will use futex(2) underneath anyway to block the calling thread, so there is an additional layer of code and locking that needs to be executed.

void WINAPI RtlWakeConditionVariable( RTL_CONDITION_VARIABLE *variable ) { if (interlocked_dec_if_nonzero( (int *)&variable->Ptr )) NtReleaseKeyedEvent( keyed_event, &variable->Ptr, FALSE, NULL); } void WINAPI RtlWakeAllConditionVariable( RTL_CONDITION_VARIABLE *variable ) { int val = interlocked_xchg( (int *)&variable->Ptr, 0 ); while (val-- > 0) NtReleaseKeyedEvent( keyed_event, &variable->Ptr, FALSE, NULL); } void RtlSleepConditionVariableSRW( RTL_CONDITION_VARIABLE *variable, RTL_SRWLOCK *lock) { interlocked_xchg_add( (int *)&variable->Ptr, 1 ); RtlReleaseSRWLockExclusive( lock ); NtWaitForKeyedEvent( keyed_event, &variable->Ptr, FALSE, timeout );  RtlAcquireSRWLockExclusive( lock ); } 

Another way is to emulate futex-like functionality in userspace. WebKit's ParkingLot is an example of this. You can read more here. Mechanism is basically the same as with futex(2), but since it is implemented in userspace it allows execution of custom user code from inside the queue lock. This makes it possible to store less information in synchronization object and be overall smarter with thread scheduling. Unfortunately, performance in contested case will most likely be slower, as it will use futex(2) underneath anyway to block the calling thread, so there is an additional layer of code and locking that needs to be executed.

added 12 characters in body
Source Link
user289860
user289860
Loading
Fix typo in sema_post
Source Link
user289860
user289860
Loading
deleted 52 characters in body
Source Link
user289860
user289860
Loading
added 10 characters in body
Source Link
user289860
user289860
Loading
deleted 6 characters in body
Source Link
user289860
user289860
Loading
added syntax-highlighting
Source Link
Deduplicator
  • 9.3k
  • 5
  • 34
  • 53
Loading
edited body
Source Link
user289860
user289860
Loading
edited body
Source Link
user289860
user289860
Loading
added 7 characters in body
Source Link
user289860
user289860
Loading
deleted 1 character in body
Source Link
user289860
user289860
Loading
added 78 characters in body
Source Link
user289860
user289860
Loading
added 141 characters in body
Source Link
user289860
user289860
Loading
added 98 characters in body
Source Link
user289860
user289860
Loading
edited body
Source Link
user289860
user289860
Loading
added 104 characters in body
Source Link
user289860
user289860
Loading
added 802 characters in body
Source Link
user289860
user289860
Loading
Source Link
user289860
user289860
Loading