I saw a post in chat a few days ago, of what I thought was an interesting (albeit fairly simple) statistical challenge:
By @skiwi:
If I shoot once with 50% chance of target A and 50% of target B, and if I hit target A I have 95% chance to shoot again, then how many of target A will I hit on average?
So I broke out the SQL machine and decided to write something like this, but more generic. So for the 50/50 chance I used a coin-flip-like logic, then for the subsequent rolls, I used the analogy of a dice roll, as I feel these two are clear and should be familiar to anyone.
I wrapped this into a stored procedure usp_CoinFlipAndDiceRolls.sql (the usp_ or sp_ prefix is very commonly used in databases when naming procedures).
I did as much as I could to make it set-based, but some parts, especially the repeated dice rolls, I couldn't think of a good way to do it without a loop.
Execution is still quite fast, 1-2 seconds for 10000 rows and about 5 seconds for 10000 rows. What can I improve on? Do my statistics make sense?
use PhrancisLocal; go if object_id('dbo.usp_CoinFlipAndDiceRolls') is not null drop procedure dbo.usp_CoinFlipAndDiceRolls; go set ansi_nulls on; go set quoted_identifier on; go create procedure dbo.usp_CoinFlipAndDiceRolls @numberOfRuns int = 1000, @chanceToWinDiceRoll float = null, @includeStatistics bit = 0 as begin set nocount on; /* Default dice roll to 1 chance in 6 if not passed in to the procedure as a parameter: */ if @chanceToWinDiceRoll is null set @chanceToWinDiceRoll = (1.0/6); /* CONSTANTS */ declare @CHANCE_TO_WIN_COIN_FLIP float = 0.5, @WIN bit = 1, @LOSS bit = 0; /* Temporary table which will be used to hold the results of the operations and aggregate statistics from the results to return to the caller. */ if object_id('tempdb..#CoinFlipAndDiceRolls') is not null drop table #CoinFlipAndDiceRolls; create table #CoinFlipAndDiceRolls ( row_id int identity(1,1) primary key, CoinFlipSeed float, CoinFlipWon bit, NumberOfDiceRolls int ); /* Seed the coin flip according to the number of runs desired, and populate the coingFlipWon value accordingly based on a 50% random chance */ declare @row_num int = 1, @CoinFlipSeed float = null; while (@row_num <= @numberOfRuns) begin set @CoinFlipSeed = rand(); insert into #CoinFlipAndDiceRolls ( CoinFlipSeed, CoinFlipWon, NumberOfDiceRolls ) select @CoinFlipSeed, case when @CoinFlipSeed >= @CHANCE_TO_WIN_COIN_FLIP then @WIN else @LOSS end, case when @CoinFlipSeed >= @CHANCE_TO_WIN_COIN_FLIP then 0 else null end; set @row_num = @row_num + 1; end; /* Begin logic for dice rolls: * A dice is rolled once for each run (i.e. row) where the coin flip was a Win. * Each dice roll Win triggers another dice roll, and so on until a dice roll is Lost. * The criteria for a dice roll to be a Win is determined by the @chanceToWinDiceRoll number * where a 0.25 value will result in a 25% chance to Win. * @diceRollSeed represents each individual dice roll. */ declare @diceRollSeed float = null; declare @currentRow int = ( select min(row_id) from #CoinFlipAndDiceRolls where NumberOfDiceRolls = 0 and CoinFlipWon = @WIN ); while exists ( select 1 from #CoinFlipAndDiceRolls where NumberOfDiceRolls = 0 and CoinFlipWon = @WIN ) begin set @diceRollSeed = rand(); while @diceRollSeed <= @chanceToWinDiceRoll begin update #CoinFlipAndDiceRolls set NumberOfDiceRolls = NumberOfDiceRolls + 1 where row_id = @currentRow; /* Roll a new dice value: */ set @diceRollSeed = rand(); end set @currentRow = ( select min(row_id) from #CoinFlipAndDiceRolls where NumberOfDiceRolls = 0 and CoinFlipWon = @WIN ); end; /* Return the result finished result set: */ select * from #CoinFlipAndDiceRolls order by row_id asc; /* Statistics about the finished result set, if the call includes this parameter: */ if @includeStatistics = 1 begin select [NumberOfRuns] = @numberOfRuns, [ChanceToWinDiceRoll] = @chanceToWinDiceRoll, [TotalCoinFlipWins] = sum(convert(int, CoinFlipWon)), [TotalDiceRolls] = sum(coalesce(NumberOfDiceRolls, 0)), [AverageDiceRollsPerCoinFlipWon] = (select avg(NumberOfDiceRolls) from #CoinFlipAndDiceRolls where NumberOfDiceRolls is not null) from #CoinFlipAndDiceRolls; end; /* Cleanup */ if object_id('tempdb..#CoinFlipAndDiceRolls') is not null drop table #CoinFlipAndDiceRolls; end; go Test runs
-- No params execute dbo.usp_CoinFlipAndDiceRolls; -- With params execute dbo.usp_CoinFlipAndDiceRolls @numberOfRuns = 1000, @chanceToWinDiceRoll = 0.95; -- With params & statistics execute dbo.usp_CoinFlipAndDiceRolls @numberOfRuns = 1000, @chanceToWinDiceRoll = 0.75, @includeStatistics = 1; The results look something like this (when ran with @includeStatistics on):
