Skip to main content
added 12039 characters in body
Source Link
skibrianski
  • 1.3k
  • 7
  • 10

perl, 17m46s on a core i7 w/ 8GB mem

First, we use sort -n -k3 to get the most important field in order, taking advantage of the built-in parallelism on modern versions of sort(1). Then, since perl is greatly hampered by the fact that a simple scalar takes on the order of 80 bytes each (50 million * 3 * 80 is too much - at least 12GB), we slurp the output in to a 50 million * 12 byte array (12 bytes per line, each line contains 3 integers that can be represented as a 32 bit integer). Then we fire off 8 threads covering (roughly) 1/8ths8th of the data each (+ some overlap).

#!perl use strict; use warnings; # find lines s.t. $lines[$M]->{a} == $lines[$N]->{b} and # 0 <= $lines[$M]->{t} - $lines[$N]->{t} < 100 # OR $lines[$M]->{b} == $lines[$N]->{a} and # 0 <= $lines[$N]->{t} - $lines[$M]->{t} < 100 my $infile = shift; open(my $fh, "sort -n -k3 $infile |") || die "open sort pipe: $@"; my @lines; my $bytes_per_int = 4; my $bytes_per_line = $bytes_per_int * 3; my $nlines = 50_000_000; my $buf = "\0" x ($nlines * $bytes_per_line); my $ln = 0; my $nprocs = 8; my $last_group_start = 0; my $this_group_start; my $group = $nlines / $nprocs; my @pids; # TODO: uncollapse scalars until we get to just under 8GB. maybe each # entry as a scalar would work? while(<$fh>) { my ($A, $B, $T) = split/\s+/; substr($buf, $ln * $bytes_per_line, $bytes_per_line, pack "L3", ($A, $B, $T)); if( defined $this_group_start ) { if( $T - $last_group_start >= $group + 100 ) { if(my $pid = fork()) { push @pids, $pid; $last_group_start = $this_group_start; undef $this_group_start; } else { #warn "checking $last_group_start - $ln...\n"; for(my $l=$last_group_start; $l<=$ln; ++$l) { my $lpos = $l * $bytes_per_line; my ($A, $B, $T) = unpack "L3", substr($buf, $lpos, $bytes_per_line); my ($lA, $lB); my $lT = $T; for(my $lb=$l; $lb>=$last_group_start && $T - $lT <= 100; $lb--, $lpos -= $bytes_per_line) { ($lA, $lB, $lT) = unpack "L3", substr($buf, $lpos, $bytes_per_line); if($A == $lB || $B == $lA) { #print "($last_group_start) $A $B $T matches $lA $lB $lT\n"; print "$lA $lB $lT\n$A $B $T\n"; } } } exit; } } } elsif( !defined $this_group_start && $T - $last_group_start >= $group ) { #warn "this_group_start line: $ln, T: $T, last_group_start: $last_group_start";  $this_group_start = $ln; } $ln++; } waitpid $_, 0 for @pids; 

Unsorted output:

8455767 30937130 50130 20468509 8455767 50175 47249523 17051933 111141 17051933 34508661 111215 39504040 36752393 196668 42758015 39504040 196685 25072294 28422439 329284 35458609 25072294 329375 45340163 42711710 6480186 39315845 45340163 6480248 1435779 49643646 12704996 38229692 1435779 12705039 18487099 24556657 6665821 24556657 28498505 6665884 6330540 35363455 18877328 22500774 6330540 18877347 10236123 22026399 598647 39941282 10236123 598717 45756517 24831687 6726642 34578158 45756517 6726670 29385533 7181838 621179 7181838 29036551 621189 40647929 11895227 25075557 11895227 1900895 25075652 17921258 42642822 18935923 40140275 17921258 18935949 44573044 38139831 12899467 38139831 1321655 12899468 11223983 1788656 12920946 1788656 21905607 12921040 1357565 8148234 801402 8148234 46556089 801498 30929735 303373 19105532 31258424 30929735 19105543 34899776 9929507 6990057 9929507 49221343 6990078 49779853 43951357 25306335 41120244 49779853 25306424 6177313 41551055 25343755 24462722 6177313 25343804 16392217 32915797 31472388 32915797 19696674 31472479 6834305 36264354 25440771 44983650 6834305 25440800 26559923 47360227 19356637 47360227 49749757 19356700 33018256 36233269 37654651 36233269 5459333 37654671 6932997 23123567 25502355 23123567 7882426 25502356 5878434 43421728 25510707 43421728 40827189 25510765 38695636 33504665 1099515 13504170 38695636 1099605 32832720 40188845 37689854 8335398 32832720 37689927 35858995 41917651 1130028 41917651 28797444 1130096 47102665 6796460 43806189 6796460 6113288 43806229 21248273 5422675 43819677 48011830 21248273 43819728 32187324 39177373 25624030 39177373 42539402 25624102 41722647 14351373 25626925 14351373 45070518 25627013 22298566 25860163 37862683 2273777 22298566 37862692 10617763 32776583 7561272 35581425 10617763 7561279 18526541 18709244 31960780 18709244 32777622 31960867 36976439 24222624 31973215 24222624 9534777 31973262 25751007 11612449 38066826 43652333 25751007 38066923 8303520 2615666 7633297 2615666 29961938 7633357 22317573 31811902 31982722 14298221 22317573 31982819 43089781 7653813 44154683 8732322 43089781 44154769 24227311 43800700 13711475 40906680 24227311 13711539 48061947 30109196 7660402 43993467 48061947 7660488 29580639 5292950 38140285 5292950 21293538 38140356 17646232 47737600 32058831 47737600 42934248 32058836 13262640 23462343 1617194 23462343 1901587 1617259 5150775 7046596 44270140 7046596 22819218 44270181 17749796 34924638 32171251 8386063 17749796 32171346 30095973 12202864 38257881 12202864 42679593 38257912 10353022 40646034 26158412 40646034 36237182 26158412 8416485 16245525 32223010 16245525 32420032 32223081 20420340 1371966 7893319 1371966 2031617 7893335 2864137 20279212 26199008 29145409 2864137 26199080 29141766 19729396 44433106 44115780 29141766 44433141 6513924 34515379 32283579 12686666 6513924 32283636 20116056 49736865 44464394 49736865 47918939 44464416 38212450 3465543 32302772 3465543 39217131 32302873 12019664 37367876 44485630 3639658 12019664 44485639 18053021 1279896 7973955 2220749 18053021 7974031 19701732 12984505 1857435 24625926 19701732 1857528 9876789 34881917 26285125 27687743 9876789 26285134 5696632 6064263 44534580 34888313 5696632 44534629 14865531 46418593 38457138 5929897 14865531 38457191 44378135 4051962 38485208 4051962 10804515 38485308 11865822 21793388 14142622 7760360 11865822 14142649 32333570 24478420 44702533 24478420 23749609 44702588 29098286 25015092 44723985 32171647 29098286 44723985 20522503 20522503 2127735 20522503 20522503 2127735 22597975 20938239 8260902 20938239 48618802 8260905 8310032 34659671 2153994 34659671 25406149 2154075 49085033 5708432 26644257 5708432 32265692 26644305 18751513 18226037 32726402 18226037 33885794 32726424 45877488 23211339 20566948 23211339 26209405 20567002 48554034 25770643 38853402 9683274 48554034 38853467 9770420 14556349 2309265 27255587 9770420 2309324 32926392 16744099 44954824 24840989 32926392 44954840 29066838 49434549 26755357 49434549 12635292 26755407 21927714 32352409 20626921 32352409 15895076 20626932 7422009 23559357 14550898 32743911 7422009 14550982 38816601 5850890 26851026 5850890 32996623 26851107 42148171 47021378 26872907 47021378 32628418 26872908 9850929 10501741 32998960 10501741 24899993 32999043 27491904 4393602 33033499 4393602 17712085 33033570 37978226 42226216 39114479 42226216 2511412 39114525 42859989 49908919 45241083 48131208 42859989 45241088 39753103 30674979 14807321 30674979 45637890 14807371 30154199 11988643 2641926 11988643 11241926 2641976 7191871 13518594 45370275 13518594 45354921 45370344 54745 19711137 8871851 24814115 54745 8871937 38770495 34574748 2756244 41962321 38770495 2756337 26229406 39306415 21057327 10735951 26229406 21057347 46704290 11506122 39359422 18181795 46704290 39359481 38796645 28410469 45452212 28410469 13478996 45452222 412456 27727741 39466147 27727741 19639136 39466226 24470627 13030982 21266756 13030982 21713410 21266825 6058593 23139172 27435254 19236012 6058593 27435303 14457750 39190113 39701131 30253141 14457750 39701227 26898421 39016446 45812750 40952330 26898421 45812829 18647206 27663400 45817956 27663400 21728474 45817989 5559358 41319001 33664547 41319001 37210583 33664636 29066692 30653068 39759813 30653068 38963132 39759856 12086617 49971187 3232640 49971187 32302154 3232649 12008399 13656671 3239395 43088998 12008399 3239439 10061612 38594475 39804389 38594475 6327106 39804405 16703379 21150436 39851057 21150436 34093320 39851136 1035486 4199407 3314170 26974438 1035486 3314196 21869320 14532221 33851404 15208937 21869320 33851473 38840190 4742355 3402401 4742355 46055869 3402462 34432016 8734566 39966972 27614117 34432016 39967002 9988172 49209666 46063078 49209666 29374155 46063087 3208946 47030309 21722002 47030309 39809983 21722030 10928661 46423741 3496854 46423741 29486710 3496862 42464855 22978174 46154827 22978174 3814497 46154901 47090840 16768393 46169667 39523858 47090840 46169714 28186104 11618234 34024001 11618234 33711158 34024019 45471813 37332848 3585557 37332848 4607526 3585600 14885742 38990612 15863749 38990612 3710491 15863779 42391514 33643913 22005928 33643913 32254640 22006022 4299590 19482026 34202327 19482026 35838894 34202406 24298776 16276160 3858885 16276160 3198758 3858958 29322567 12536696 40433239 12536696 26083938 40433317 16080151 9648322 22221443 9648322 43846385 22221458 999302 19218350 10078183 10296062 999302 10078189 40544377 34492433 34463953 19908418 40544377 34463993 10765321 45143043 34542584 39154522 10765321 34542646 48642526 31097951 4104790 2940654 48642526 4104887 26972730 47422139 46846889 39228577 26972730 46846901 13788696 11503551 34728076 11503551 9151627 34728130 8676030 30463644 10406398 15204754 8676030 10406405 42984277 41087708 34805119 48741576 42984277 34805143 29634598 2151247 22699609 12264074 29634598 22699614 47525963 48470003 16667878 48470003 4566846 16667953 9725907 43325112 4498307 26465445 9725907 4498368 306967 11708860 10633595 11708860 31017081 10633669 39420965 46595640 41089015 46595640 41260374 41089048 29232745 39705052 16754836 4739295 29232745 16754840 35246405 42811088 41273637 48986699 35246405 41273719 2398239 36985098 35181790 36985098 7460784 35181841 18955749 23678549 35221035 47264406 18955749 35221129 18105816 26003002 17044057 26003002 17467477 17044087 14430126 46039962 47492180 46039962 29118827 47492275 30329324 40926812 41425850 43304610 30329324 41425912 34966996 36567528 17095113 3967517 34966996 17095144 42829171 42530474 23209891 25923738 42829171 23209967 28187681 26297990 35474412 48986691 28187681 35474475 5707126 41598794 17298139 40466899 5707126 17298188 28838696 30725820 5142797 30725820 35360418 5142798 44642019 42570370 17339657 42570370 19022469 17339727 42193681 8389736 17386517 48906013 42193681 17386586 42303185 30337820 41795129 30337820 42473956 41795170 30935782 8441903 17515229 41549758 30935782 17515275 41239019 10011768 23619001 10011768 25386353 23619062 494288 13341166 29815779 49113152 494288 29815876 7106674 26227442 29833029 47459682 7106674 29833047 17246497 35389391 17628365 35389391 34005133 17628371 23347674 48243185 17792799 48243185 22907892 17792836 21852744 1662414 36088704 8040124 21852744 36088775 32384657 27122374 36100767 24980361 32384657 36100782 31016207 26300043 42222489 26300043 36869529 42222544 17178756 44315094 42223989 44315094 11222466 42224042 34139317 39164101 36197907 39164101 27563542 36197947 31638631 22215137 17999735 22215137 10771707 17999769 30257199 32883043 24127009 32883043 179099 24127047 47774058 17451960 30283073 44583527 47774058 30283162 13816647 12695130 24145102 12695130 42284941 24145188 42749234 20004242 5893793 20004242 38129713 5893819 22210359 22178109 18109989 22178109 112961 18110049 42509645 28599506 42508465 28599506 3722411 42508513 34412629 22547405 48610262 22547405 16664124 48610296 2330283 32267749 24256113 35915758 2330283 24256157 44560231 49353986 12101694 6471293 44560231 12101780 23289721 8186827 30407293 10624448 23289721 30407389 12329357 35765163 30560085 4511908 12329357 30560158 31332240 39704929 12269193 39704929 47770487 12269249 22286152 22082044 36734758 22082044 25076919 36734833 47381309 9459604 36735886 9459604 31071680 36735890 43832763 45342283 30707519 45342283 26992816 30707602 2883029 18642608 42989696 14697025 2883029 42989793 15149987 40746227 24700535 40746227 34776566 24700549 2387554 49015265 43057085 49015265 21103141 43057139 23057202 13308993 30982514 34596334 23057202 30982553 44598498 31714790 43285828 18170064 44598498 43285841 38273701 11976319 31179763 15344094 38273701 31179764 3651338 27427037 37188945 12876654 3651338 37189007 10081580 3418061 37221143 3418061 38353019 37221143 172544 18699860 37295343 824744 172544 37295372 13914 8890169 37303853 8890169 14008003 37303898 18716557 29456130 49605004 29456130 16390535 49605083 15398102 22446674 43711290 22446674 38760679 43711383 

I'm sure this would be an order of magnitude faster in C, but I probably won't take the time to do so.

perl

First, we use sort -n -k3 to get the most important field in order. Then, since perl is greatly hampered by the fact that a simple scalar takes on the order of 80 bytes, we slurp the output in to a 50 million * 12 byte array (12 bytes per line, each line contains 3 integers that can be represented as a 32 bit integer). Then we fire off 8 threads covering (roughly) 1/8ths of the data each (+ some overlap).

#!perl use strict; use warnings; # find lines s.t. $lines[$M]->{a} == $lines[$N]->{b} and # 0 <= $lines[$M]->{t} - $lines[$N]->{t} < 100 # OR $lines[$M]->{b} == $lines[$N]->{a} and # 0 <= $lines[$N]->{t} - $lines[$M]->{t} < 100 my $infile = shift; open(my $fh, "sort -n -k3 $infile |") || die "open sort pipe: $@"; my @lines; my $bytes_per_int = 4; my $bytes_per_line = $bytes_per_int * 3; my $nlines = 50_000_000; my $buf = "\0" x ($nlines * $bytes_per_line); my $ln = 0; my $nprocs = 8; my $last_group_start = 0; my $this_group_start; my $group = $nlines / $nprocs; my @pids; # TODO: uncollapse scalars until we get to just under 8GB. maybe each # entry as a scalar would work? while(<$fh>) { my ($A, $B, $T) = split/\s+/; substr($buf, $ln * $bytes_per_line, $bytes_per_line, pack "L3", ($A, $B, $T)); if( defined $this_group_start ) { if( $T - $last_group_start >= $group + 100 ) { if(my $pid = fork()) { push @pids, $pid; $last_group_start = $this_group_start; undef $this_group_start; } else { #warn "checking $last_group_start - $ln...\n"; for(my $l=$last_group_start; $l<=$ln; ++$l) { my $lpos = $l * $bytes_per_line; my ($A, $B, $T) = unpack "L3", substr($buf, $lpos, $bytes_per_line); my ($lA, $lB); my $lT = $T; for(my $lb=$l; $lb>=$last_group_start && $T - $lT <= 100; $lb--, $lpos -= $bytes_per_line) { ($lA, $lB, $lT) = unpack "L3", substr($buf, $lpos, $bytes_per_line); if($A == $lB || $B == $lA) { #print "($last_group_start) $A $B $T matches $lA $lB $lT\n"; print "$lA $lB $lT\n$A $B $T\n"; } } } exit; } } } elsif( !defined $this_group_start && $T - $last_group_start >= $group ) { #warn "this_group_start line: $ln, T: $T, last_group_start: $last_group_start";  $this_group_start = $ln; } $ln++; } waitpid $_, 0 for @pids; 

perl, 17m46s on a core i7 w/ 8GB mem

First, we use sort -n -k3 to get the most important field in order, taking advantage of the built-in parallelism on modern versions of sort(1). Then, since perl is greatly hampered by the fact that a simple scalar takes on the order of 80 bytes each (50 million * 3 * 80 is too much - at least 12GB), we slurp the output in to a 50 million * 12 byte array (12 bytes per line, each line contains 3 integers that can be represented as a 32 bit integer). Then we fire off 8 threads covering (roughly) 1/8th of the data each (+ some overlap).

#!perl use strict; use warnings; # find lines s.t. $lines[$M]->{a} == $lines[$N]->{b} and # 0 <= $lines[$M]->{t} - $lines[$N]->{t} < 100 # OR $lines[$M]->{b} == $lines[$N]->{a} and # 0 <= $lines[$N]->{t} - $lines[$M]->{t} < 100 my $infile = shift; open(my $fh, "sort -n -k3 $infile |") || die "open sort pipe: $@"; my @lines; my $bytes_per_int = 4; my $bytes_per_line = $bytes_per_int * 3; my $nlines = 50_000_000; my $buf = "\0" x ($nlines * $bytes_per_line); my $ln = 0; my $nprocs = 8; my $last_group_start = 0; my $this_group_start; my $group = $nlines / $nprocs; my @pids; while(<$fh>) { my ($A, $B, $T) = split/\s+/; substr($buf, $ln * $bytes_per_line, $bytes_per_line, pack "L3", ($A, $B, $T)); if( defined $this_group_start ) { if( $T - $last_group_start >= $group + 100 ) { if(my $pid = fork()) { push @pids, $pid; $last_group_start = $this_group_start; undef $this_group_start; } else { #warn "checking $last_group_start - $ln...\n"; for(my $l=$last_group_start; $l<=$ln; ++$l) { my $lpos = $l * $bytes_per_line; my ($A, $B, $T) = unpack "L3", substr($buf, $lpos, $bytes_per_line); my ($lA, $lB); my $lT = $T; for(my $lb=$l; $lb>=$last_group_start && $T - $lT <= 100; $lb--, $lpos -= $bytes_per_line) { ($lA, $lB, $lT) = unpack "L3", substr($buf, $lpos, $bytes_per_line); if($A == $lB || $B == $lA) { #print "($last_group_start) $A $B $T matches $lA $lB $lT\n"; print "$lA $lB $lT\n$A $B $T\n"; } } } exit; } } } elsif( !defined $this_group_start && $T - $last_group_start >= $group ) { $this_group_start = $ln; } $ln++; } waitpid $_, 0 for @pids; 

Unsorted output:

8455767 30937130 50130 20468509 8455767 50175 47249523 17051933 111141 17051933 34508661 111215 39504040 36752393 196668 42758015 39504040 196685 25072294 28422439 329284 35458609 25072294 329375 45340163 42711710 6480186 39315845 45340163 6480248 1435779 49643646 12704996 38229692 1435779 12705039 18487099 24556657 6665821 24556657 28498505 6665884 6330540 35363455 18877328 22500774 6330540 18877347 10236123 22026399 598647 39941282 10236123 598717 45756517 24831687 6726642 34578158 45756517 6726670 29385533 7181838 621179 7181838 29036551 621189 40647929 11895227 25075557 11895227 1900895 25075652 17921258 42642822 18935923 40140275 17921258 18935949 44573044 38139831 12899467 38139831 1321655 12899468 11223983 1788656 12920946 1788656 21905607 12921040 1357565 8148234 801402 8148234 46556089 801498 30929735 303373 19105532 31258424 30929735 19105543 34899776 9929507 6990057 9929507 49221343 6990078 49779853 43951357 25306335 41120244 49779853 25306424 6177313 41551055 25343755 24462722 6177313 25343804 16392217 32915797 31472388 32915797 19696674 31472479 6834305 36264354 25440771 44983650 6834305 25440800 26559923 47360227 19356637 47360227 49749757 19356700 33018256 36233269 37654651 36233269 5459333 37654671 6932997 23123567 25502355 23123567 7882426 25502356 5878434 43421728 25510707 43421728 40827189 25510765 38695636 33504665 1099515 13504170 38695636 1099605 32832720 40188845 37689854 8335398 32832720 37689927 35858995 41917651 1130028 41917651 28797444 1130096 47102665 6796460 43806189 6796460 6113288 43806229 21248273 5422675 43819677 48011830 21248273 43819728 32187324 39177373 25624030 39177373 42539402 25624102 41722647 14351373 25626925 14351373 45070518 25627013 22298566 25860163 37862683 2273777 22298566 37862692 10617763 32776583 7561272 35581425 10617763 7561279 18526541 18709244 31960780 18709244 32777622 31960867 36976439 24222624 31973215 24222624 9534777 31973262 25751007 11612449 38066826 43652333 25751007 38066923 8303520 2615666 7633297 2615666 29961938 7633357 22317573 31811902 31982722 14298221 22317573 31982819 43089781 7653813 44154683 8732322 43089781 44154769 24227311 43800700 13711475 40906680 24227311 13711539 48061947 30109196 7660402 43993467 48061947 7660488 29580639 5292950 38140285 5292950 21293538 38140356 17646232 47737600 32058831 47737600 42934248 32058836 13262640 23462343 1617194 23462343 1901587 1617259 5150775 7046596 44270140 7046596 22819218 44270181 17749796 34924638 32171251 8386063 17749796 32171346 30095973 12202864 38257881 12202864 42679593 38257912 10353022 40646034 26158412 40646034 36237182 26158412 8416485 16245525 32223010 16245525 32420032 32223081 20420340 1371966 7893319 1371966 2031617 7893335 2864137 20279212 26199008 29145409 2864137 26199080 29141766 19729396 44433106 44115780 29141766 44433141 6513924 34515379 32283579 12686666 6513924 32283636 20116056 49736865 44464394 49736865 47918939 44464416 38212450 3465543 32302772 3465543 39217131 32302873 12019664 37367876 44485630 3639658 12019664 44485639 18053021 1279896 7973955 2220749 18053021 7974031 19701732 12984505 1857435 24625926 19701732 1857528 9876789 34881917 26285125 27687743 9876789 26285134 5696632 6064263 44534580 34888313 5696632 44534629 14865531 46418593 38457138 5929897 14865531 38457191 44378135 4051962 38485208 4051962 10804515 38485308 11865822 21793388 14142622 7760360 11865822 14142649 32333570 24478420 44702533 24478420 23749609 44702588 29098286 25015092 44723985 32171647 29098286 44723985 20522503 20522503 2127735 20522503 20522503 2127735 22597975 20938239 8260902 20938239 48618802 8260905 8310032 34659671 2153994 34659671 25406149 2154075 49085033 5708432 26644257 5708432 32265692 26644305 18751513 18226037 32726402 18226037 33885794 32726424 45877488 23211339 20566948 23211339 26209405 20567002 48554034 25770643 38853402 9683274 48554034 38853467 9770420 14556349 2309265 27255587 9770420 2309324 32926392 16744099 44954824 24840989 32926392 44954840 29066838 49434549 26755357 49434549 12635292 26755407 21927714 32352409 20626921 32352409 15895076 20626932 7422009 23559357 14550898 32743911 7422009 14550982 38816601 5850890 26851026 5850890 32996623 26851107 42148171 47021378 26872907 47021378 32628418 26872908 9850929 10501741 32998960 10501741 24899993 32999043 27491904 4393602 33033499 4393602 17712085 33033570 37978226 42226216 39114479 42226216 2511412 39114525 42859989 49908919 45241083 48131208 42859989 45241088 39753103 30674979 14807321 30674979 45637890 14807371 30154199 11988643 2641926 11988643 11241926 2641976 7191871 13518594 45370275 13518594 45354921 45370344 54745 19711137 8871851 24814115 54745 8871937 38770495 34574748 2756244 41962321 38770495 2756337 26229406 39306415 21057327 10735951 26229406 21057347 46704290 11506122 39359422 18181795 46704290 39359481 38796645 28410469 45452212 28410469 13478996 45452222 412456 27727741 39466147 27727741 19639136 39466226 24470627 13030982 21266756 13030982 21713410 21266825 6058593 23139172 27435254 19236012 6058593 27435303 14457750 39190113 39701131 30253141 14457750 39701227 26898421 39016446 45812750 40952330 26898421 45812829 18647206 27663400 45817956 27663400 21728474 45817989 5559358 41319001 33664547 41319001 37210583 33664636 29066692 30653068 39759813 30653068 38963132 39759856 12086617 49971187 3232640 49971187 32302154 3232649 12008399 13656671 3239395 43088998 12008399 3239439 10061612 38594475 39804389 38594475 6327106 39804405 16703379 21150436 39851057 21150436 34093320 39851136 1035486 4199407 3314170 26974438 1035486 3314196 21869320 14532221 33851404 15208937 21869320 33851473 38840190 4742355 3402401 4742355 46055869 3402462 34432016 8734566 39966972 27614117 34432016 39967002 9988172 49209666 46063078 49209666 29374155 46063087 3208946 47030309 21722002 47030309 39809983 21722030 10928661 46423741 3496854 46423741 29486710 3496862 42464855 22978174 46154827 22978174 3814497 46154901 47090840 16768393 46169667 39523858 47090840 46169714 28186104 11618234 34024001 11618234 33711158 34024019 45471813 37332848 3585557 37332848 4607526 3585600 14885742 38990612 15863749 38990612 3710491 15863779 42391514 33643913 22005928 33643913 32254640 22006022 4299590 19482026 34202327 19482026 35838894 34202406 24298776 16276160 3858885 16276160 3198758 3858958 29322567 12536696 40433239 12536696 26083938 40433317 16080151 9648322 22221443 9648322 43846385 22221458 999302 19218350 10078183 10296062 999302 10078189 40544377 34492433 34463953 19908418 40544377 34463993 10765321 45143043 34542584 39154522 10765321 34542646 48642526 31097951 4104790 2940654 48642526 4104887 26972730 47422139 46846889 39228577 26972730 46846901 13788696 11503551 34728076 11503551 9151627 34728130 8676030 30463644 10406398 15204754 8676030 10406405 42984277 41087708 34805119 48741576 42984277 34805143 29634598 2151247 22699609 12264074 29634598 22699614 47525963 48470003 16667878 48470003 4566846 16667953 9725907 43325112 4498307 26465445 9725907 4498368 306967 11708860 10633595 11708860 31017081 10633669 39420965 46595640 41089015 46595640 41260374 41089048 29232745 39705052 16754836 4739295 29232745 16754840 35246405 42811088 41273637 48986699 35246405 41273719 2398239 36985098 35181790 36985098 7460784 35181841 18955749 23678549 35221035 47264406 18955749 35221129 18105816 26003002 17044057 26003002 17467477 17044087 14430126 46039962 47492180 46039962 29118827 47492275 30329324 40926812 41425850 43304610 30329324 41425912 34966996 36567528 17095113 3967517 34966996 17095144 42829171 42530474 23209891 25923738 42829171 23209967 28187681 26297990 35474412 48986691 28187681 35474475 5707126 41598794 17298139 40466899 5707126 17298188 28838696 30725820 5142797 30725820 35360418 5142798 44642019 42570370 17339657 42570370 19022469 17339727 42193681 8389736 17386517 48906013 42193681 17386586 42303185 30337820 41795129 30337820 42473956 41795170 30935782 8441903 17515229 41549758 30935782 17515275 41239019 10011768 23619001 10011768 25386353 23619062 494288 13341166 29815779 49113152 494288 29815876 7106674 26227442 29833029 47459682 7106674 29833047 17246497 35389391 17628365 35389391 34005133 17628371 23347674 48243185 17792799 48243185 22907892 17792836 21852744 1662414 36088704 8040124 21852744 36088775 32384657 27122374 36100767 24980361 32384657 36100782 31016207 26300043 42222489 26300043 36869529 42222544 17178756 44315094 42223989 44315094 11222466 42224042 34139317 39164101 36197907 39164101 27563542 36197947 31638631 22215137 17999735 22215137 10771707 17999769 30257199 32883043 24127009 32883043 179099 24127047 47774058 17451960 30283073 44583527 47774058 30283162 13816647 12695130 24145102 12695130 42284941 24145188 42749234 20004242 5893793 20004242 38129713 5893819 22210359 22178109 18109989 22178109 112961 18110049 42509645 28599506 42508465 28599506 3722411 42508513 34412629 22547405 48610262 22547405 16664124 48610296 2330283 32267749 24256113 35915758 2330283 24256157 44560231 49353986 12101694 6471293 44560231 12101780 23289721 8186827 30407293 10624448 23289721 30407389 12329357 35765163 30560085 4511908 12329357 30560158 31332240 39704929 12269193 39704929 47770487 12269249 22286152 22082044 36734758 22082044 25076919 36734833 47381309 9459604 36735886 9459604 31071680 36735890 43832763 45342283 30707519 45342283 26992816 30707602 2883029 18642608 42989696 14697025 2883029 42989793 15149987 40746227 24700535 40746227 34776566 24700549 2387554 49015265 43057085 49015265 21103141 43057139 23057202 13308993 30982514 34596334 23057202 30982553 44598498 31714790 43285828 18170064 44598498 43285841 38273701 11976319 31179763 15344094 38273701 31179764 3651338 27427037 37188945 12876654 3651338 37189007 10081580 3418061 37221143 3418061 38353019 37221143 172544 18699860 37295343 824744 172544 37295372 13914 8890169 37303853 8890169 14008003 37303898 18716557 29456130 49605004 29456130 16390535 49605083 15398102 22446674 43711290 22446674 38760679 43711383 

I'm sure this would be an order of magnitude faster in C, but I probably won't take the time to do so.

Source Link
skibrianski
  • 1.3k
  • 7
  • 10

perl

First, we use sort -n -k3 to get the most important field in order. Then, since perl is greatly hampered by the fact that a simple scalar takes on the order of 80 bytes, we slurp the output in to a 50 million * 12 byte array (12 bytes per line, each line contains 3 integers that can be represented as a 32 bit integer). Then we fire off 8 threads covering (roughly) 1/8ths of the data each (+ some overlap).

#!perl use strict; use warnings; # find lines s.t. $lines[$M]->{a} == $lines[$N]->{b} and # 0 <= $lines[$M]->{t} - $lines[$N]->{t} < 100 # OR $lines[$M]->{b} == $lines[$N]->{a} and # 0 <= $lines[$N]->{t} - $lines[$M]->{t} < 100 my $infile = shift; open(my $fh, "sort -n -k3 $infile |") || die "open sort pipe: $@"; my @lines; my $bytes_per_int = 4; my $bytes_per_line = $bytes_per_int * 3; my $nlines = 50_000_000; my $buf = "\0" x ($nlines * $bytes_per_line); my $ln = 0; my $nprocs = 8; my $last_group_start = 0; my $this_group_start; my $group = $nlines / $nprocs; my @pids; # TODO: uncollapse scalars until we get to just under 8GB. maybe each # entry as a scalar would work? while(<$fh>) { my ($A, $B, $T) = split/\s+/; substr($buf, $ln * $bytes_per_line, $bytes_per_line, pack "L3", ($A, $B, $T)); if( defined $this_group_start ) { if( $T - $last_group_start >= $group + 100 ) { if(my $pid = fork()) { push @pids, $pid; $last_group_start = $this_group_start; undef $this_group_start; } else { #warn "checking $last_group_start - $ln...\n"; for(my $l=$last_group_start; $l<=$ln; ++$l) { my $lpos = $l * $bytes_per_line; my ($A, $B, $T) = unpack "L3", substr($buf, $lpos, $bytes_per_line); my ($lA, $lB); my $lT = $T; for(my $lb=$l; $lb>=$last_group_start && $T - $lT <= 100; $lb--, $lpos -= $bytes_per_line) { ($lA, $lB, $lT) = unpack "L3", substr($buf, $lpos, $bytes_per_line); if($A == $lB || $B == $lA) { #print "($last_group_start) $A $B $T matches $lA $lB $lT\n"; print "$lA $lB $lT\n$A $B $T\n"; } } } exit; } } } elsif( !defined $this_group_start && $T - $last_group_start >= $group ) { #warn "this_group_start line: $ln, T: $T, last_group_start: $last_group_start"; $this_group_start = $ln; } $ln++; } waitpid $_, 0 for @pids;