going off of above keredson and updating and rerunning under python3 and adding a 4th option for a list comprehension:
import time class Timer(): def __init__(self, desc): self.desc = desc def __enter__(self): self.start = time.time() def __exit__(self, type, value, traceback): self.finish = time.time() print(self.desc, 'took', self.finish - self.start) data = list(range(4000000)) data = data + data print(f'{len(data):,} items to add') with Timer('option 1'): myset = set() for x in data: if (x not in myset): myset.add(x) print(f'{len(myset):,} items to added') with Timer('option 2'): myset = set() for x in data: myset.add(x) print(f'{len(myset):,} items to added') with Timer('option 3'): mylist = list() for x in data: mylist.append(x) myset = set(mylist) print(f'{len(myset):,} items to added') with Timer('option 4'): myset = set([ x for x in data ]) print(f'{len(myset):,} items to added')
where the output was:
8,000,000 items to add option 1 took 0.771376371383667 4,000,000 items to added option 2 took 0.6729307174682617 4,000,000 items to added option 3 took 0.6674449443817139 4,000,000 items to added option 4 took 0.3772563934326172 4,000,000 items to added
and you'll see the list comprehension method works almost twice as fast as the former three.
timeitmodule...