The Selenium API doesn't provide a way to get a file downloaded on a remote machine.
But it's still possible with Selenium alone depending on the browser.
With Chrome the downloaded files can be listed by navigating chrome://downloads/ and retrieved with an injected <input type="file"> in the page :
from selenium import webdriver from selenium.webdriver.support.ui import WebDriverWait import os, time, base64 def get_downloaded_files(driver): if not driver.current_url.startswith("chrome://downloads"): driver.get("chrome://downloads/") return driver.execute_script( \ "return downloads.Manager.get().items_ " " .filter(e => e.state === 'COMPLETE') " " .map(e => e.filePath || e.file_path); " ) def get_file_content(driver, path): elem = driver.execute_script( \ "var input = window.document.createElement('INPUT'); " "input.setAttribute('type', 'file'); " "input.hidden = true; " "input.onchange = function (e) { e.stopPropagation() }; " "return window.document.documentElement.appendChild(input); " ) elem._execute('sendKeysToElement', {'value': [ path ], 'text': path}) result = driver.execute_async_script( \ "var input = arguments[0], callback = arguments[1]; " "var reader = new FileReader(); " "reader.onload = function (ev) { callback(reader.result) }; " "reader.onerror = function (ex) { callback(ex.message) }; " "reader.readAsDataURL(input.files[0]); " "input.remove(); " , elem) if not result.startswith('data:') : raise Exception("Failed to get file content: %s" % result) return base64.b64decode(result[result.find('base64,') + 7:]) capabilities_chrome = { \ 'browserName': 'chrome', # 'proxy': { \ # 'proxyType': 'manual', # 'sslProxy': '50.59.162.78:8088', # 'httpProxy': '50.59.162.78:8088' # }, 'goog:chromeOptions': { \ 'args': [ ], 'prefs': { \ # 'download.default_directory': "", # 'download.directory_upgrade': True, 'download.prompt_for_download': False, 'plugins.always_open_pdf_externally': True, 'safebrowsing_for_trusted_sources_enabled': False } } } driver = webdriver.Chrome(desired_capabilities=capabilities_chrome) #driver = webdriver.Remote('http://127.0.0.1:5555/wd/hub', capabilities_chrome) # download a pdf file driver.get("https://www.mozilla.org/en-US/foundation/documents") driver.find_element_by_css_selector("[href$='.pdf']").click() # list all the completed remote files (waits for at least one) files = WebDriverWait(driver, 20, 1).until(get_downloaded_files) # get the content of the first file remotely content = get_file_content(driver, files[0]) # save the content in a local file in the working directory with open(os.path.basename(files[0]), 'wb') as f: f.write(content)
With Firefox, the files can be directly listed and retrieved by calling the browser API with a script by switching the context :
from selenium import webdriver from selenium.webdriver.support.ui import WebDriverWait import os, time, base64 def get_file_names_moz(driver): driver.command_executor._commands["SET_CONTEXT"] = ("POST", "/session/$sessionId/moz/context") driver.execute("SET_CONTEXT", {"context": "chrome"}) return driver.execute_async_script(""" var { Downloads } = Components.utils.import('resource://gre/modules/Downloads.jsm', {}); Downloads.getList(Downloads.ALL) .then(list => list.getAll()) .then(entries => entries.filter(e => e.succeeded).map(e => e.target.path)) .then(arguments[0]); """) driver.execute("SET_CONTEXT", {"context": "content"}) def get_file_content_moz(driver, path): driver.execute("SET_CONTEXT", {"context": "chrome"}) result = driver.execute_async_script(""" var { OS } = Cu.import("resource://gre/modules/osfile.jsm", {}); OS.File.read(arguments[0]).then(function(data) { var base64 = Cc["@mozilla.org/scriptablebase64encoder;1"].getService(Ci.nsIScriptableBase64Encoder); var stream = Cc['@mozilla.org/io/arraybuffer-input-stream;1'].createInstance(Ci.nsIArrayBufferInputStream); stream.setData(data.buffer, 0, data.length); return base64.encodeToString(stream, data.length); }).then(arguments[1]); """, path) driver.execute("SET_CONTEXT", {"context": "content"}) return base64.b64decode(result) capabilities_moz = { \ 'browserName': 'firefox', 'marionette': True, 'acceptInsecureCerts': True, 'moz:firefoxOptions': { \ 'args': [], 'prefs': { # 'network.proxy.type': 1, # 'network.proxy.http': '12.157.129.35', 'network.proxy.http_port': 8080, # 'network.proxy.ssl': '12.157.129.35', 'network.proxy.ssl_port': 8080, 'browser.download.dir': '', 'browser.helperApps.neverAsk.saveToDisk': 'application/octet-stream,application/pdf', 'browser.download.useDownloadDir': True, 'browser.download.manager.showWhenStarting': False, 'browser.download.animateNotifications': False, 'browser.safebrowsing.downloads.enabled': False, 'browser.download.folderList': 2, 'pdfjs.disabled': True } } } # launch Firefox # driver = webdriver.Firefox(capabilities=capabilities_moz) driver = webdriver.Remote('http://127.0.0.1:5555/wd/hub', capabilities_moz) # download a pdf file driver.get("https://www.mozilla.org/en-US/foundation/documents") driver.find_element_by_css_selector("[href$='.pdf']").click() # list all the downloaded files (waits for at least one) files = WebDriverWait(driver, 20, 1).until(get_file_names_moz) # get the content of the last downloaded file content = get_file_content_moz(driver, files[0]) # save the content in a local file in the working directory with open(os.path.basename(files[0]), 'wb') as f: f.write(content)
browser.download.dirfor FF profile anddownload.default_directoryfor Chrome options.