PHP — 2,580,210 clues
This first removes the last row and column, and bottom right corner of every box. It then tries to clear out each cell, running the board through a simple solver after each change to ensure the board is still unambiguously solvable.
Much of the code below was modified from one of my old answers. printBoard uses 0s for empty cells.
<?php // checks each row/col/block and removes impossible candidates function reduce($cand){ do{ $old = $cand; for($r = 0; $r < 9; ++$r){ for($c = 0; $c < 9; ++$c){ if(count($cand[$r][$c]) == 1){ // if filled in // remove values from row and col and block $remove = $cand[$r][$c]; for($i = 0; $i < 9; ++$i){ $cand[$r][$i] = array_diff($cand[$r][$i],$remove); $cand[$i][$c] = array_diff($cand[$i][$c],$remove); $br = floor($r/3)*3+$i/3; $bc = floor($c/3)*3+$i%3; $cand[$br][$bc] = array_diff($cand[$br][$bc],$remove); } $cand[$r][$c] = $remove; } }} }while($old != $cand); return $cand; } // checks candidate list for completion function done($cand){ for($r = 0; $r < 9; ++$r){ for($c = 0; $c < 9; ++$c){ if(count($cand[$r][$c]) != 1) return false; }} return true; } // board format: [[1,2,0,3,..],[..],..], $b[$row][$col] function solve($board){ $cand = [[],[],[],[],[],[],[],[],[]]; for($r = 0; $r < 9; ++$r){ for($c = 0; $c < 9; ++$c){ if($board[$r][$c]){ // if filled in $cand[$r][$c] = [$board[$r][$c]]; }else{ $cand[$r][$c] = range(1, 9); } }} $cand = reduce($cand); if(done($cand)) // goto not really necessary goto end; // but it feels good to use it else return false; end: // back to board format $b = []; for($r = 0; $r < 9; ++$r){ $b[$r] = []; for($c = 0; $c < 9; ++$c){ if(count($cand[$r][$c]) == 1) $b[$r][$c] = array_pop($cand[$r][$c]); else $b[$r][$c] = 0; } } return $b; } function add_zeros($board, $ind){ for($r = 0; $r < 9; ++$r){ for($c = 0; $c < 9; ++$c){ $R = ($r + (int)($ind/9)) % 9; $C = ($c + (int)($ind%9)) % 9; if($board[$R][$C]){ $tmp = $board[$R][$C]; $board[$R][$C] = 0; if(!solve($board)) $board[$R][$C] = $tmp; } }} return $board; } function generate($board, $ind){ // remove last row+col $board[8] = [0,0,0,0,0,0,0,0,0]; foreach($board as &$j) $j[8] = 0; // remove bottom corner of each box $board[2][2] = $board[2][5] = $board[5][2] = $board[5][5] = 0; $board = add_zeros($board, $ind); return $board; } function countClues($board){ $str = implode(array_map('implode', $board)); return 81 - substr_count($str, '0'); } function generateBoard($board){ return generate($board, 0); } function printBoard($board){ for($i = 0; $i < 9; ++$i){ echo implode(' ', $board[$i]) . PHP_EOL; } flush(); } function readBoard($str){ $tmp = str_split($str, 9); $board = []; for($i = 0; $i < 9; ++$i) $board[] = str_split($tmp[$i], 1); return $board; } // testing $n = 0; $f = fopen('ppcg_sudoku_testing.txt', 'r'); while(($l = fgets($f)) !== false){ $board = readBoard(trim($l)); $n += countClues(generateBoard($board)); } echo $n;