perl
First, we use sort -n -k3 to get the most important field in order. Then, since perl is greatly hampered by the fact that a simple scalar takes on the order of 80 bytes, we slurp the output in to a 50 million * 12 byte array (12 bytes per line, each line contains 3 integers that can be represented as a 32 bit integer). Then we fire off 8 threads covering (roughly) 1/8ths of the data each (+ some overlap).
#!perl use strict; use warnings; # find lines s.t. $lines[$M]->{a} == $lines[$N]->{b} and # 0 <= $lines[$M]->{t} - $lines[$N]->{t} < 100 # OR $lines[$M]->{b} == $lines[$N]->{a} and # 0 <= $lines[$N]->{t} - $lines[$M]->{t} < 100 my $infile = shift; open(my $fh, "sort -n -k3 $infile |") || die "open sort pipe: $@"; my @lines; my $bytes_per_int = 4; my $bytes_per_line = $bytes_per_int * 3; my $nlines = 50_000_000; my $buf = "\0" x ($nlines * $bytes_per_line); my $ln = 0; my $nprocs = 8; my $last_group_start = 0; my $this_group_start; my $group = $nlines / $nprocs; my @pids; # TODO: uncollapse scalars until we get to just under 8GB. maybe each # entry as a scalar would work? while(<$fh>) { my ($A, $B, $T) = split/\s+/; substr($buf, $ln * $bytes_per_line, $bytes_per_line, pack "L3", ($A, $B, $T)); if( defined $this_group_start ) { if( $T - $last_group_start >= $group + 100 ) { if(my $pid = fork()) { push @pids, $pid; $last_group_start = $this_group_start; undef $this_group_start; } else { #warn "checking $last_group_start - $ln...\n"; for(my $l=$last_group_start; $l<=$ln; ++$l) { my $lpos = $l * $bytes_per_line; my ($A, $B, $T) = unpack "L3", substr($buf, $lpos, $bytes_per_line); my ($lA, $lB); my $lT = $T; for(my $lb=$l; $lb>=$last_group_start && $T - $lT <= 100; $lb--, $lpos -= $bytes_per_line) { ($lA, $lB, $lT) = unpack "L3", substr($buf, $lpos, $bytes_per_line); if($A == $lB || $B == $lA) { #print "($last_group_start) $A $B $T matches $lA $lB $lT\n"; print "$lA $lB $lT\n$A $B $T\n"; } } } exit; } } } elsif( !defined $this_group_start && $T - $last_group_start >= $group ) { #warn "this_group_start line: $ln, T: $T, last_group_start: $last_group_start"; $this_group_start = $ln; } $ln++; } waitpid $_, 0 for @pids;