I made two python scripts
the 1st one is extracting metadata only (fullpath, mtime, size, mode etc.), and save them in a sql database. Then the 2nd python script retrieve the files from these paths to a recovery folder keeping the full path (hierarchy) of the lost+found. Is taking into consideration weird chars, spaces, quotes and other things. However, be cautious, and always check with a few files that are the most weird ones.
1st pyscript
import subprocess import sqlite3 IMG = "/run/media/mydisk/ext4.img" DB = "ext4lostfound_db.db" # SQLite DB setup conn = sqlite3.connect(DB) c = conn.cursor() c.execute(""" DROP TABLE IF EXISTS paths; """) c.execute(""" CREATE TABLE paths( path TEXT, type_0file_1dir INTEGER, F_INODE INTEGER, F_MODE TEXT, F_LINKS INTEGER, F_UID INTEGER, F_GID INTEGER, F_SIZE INTEGER, F_MTIME TEXT, f_checked INTEGER DEFAULT 0, deep_dir INTEGER DEFAULT 0 ) """) conn.commit() # BFS queue queue = [("/lost+found", 1)] # tuple: (path, depth) while queue: curr_path, depth = queue.pop(0) print(f"Scanning: {curr_path}, depth={depth}") # Run debugfs and capture stdout try: out = subprocess.check_output( ['debugfs', '-R', f'ls -l "{curr_path}"', IMG], stderr=subprocess.DEVNULL, text=True, encoding='utf-8' ) except subprocess.CalledProcessError: print(f"Failed to read {curr_path}, marking as error") c.execute("INSERT INTO paths(path,type_0file_1dir,f_checked,deep_dir) VALUES(?,?,?,?)", (curr_path, 1, -1, depth)) conn.commit() continue # Remove debugfs banner if present lines = out.splitlines() if lines and lines[0].startswith("debugfs"): lines = lines[1:] for line in lines: if not line.strip(): continue parts = line.split() inode = parts[0] mode = parts[1] links = parts[2].strip("()") name = " ".join(parts[8:]) # Skip '.' and '..' to avoid cycles if name in ('.', '..'): continue typechar = mode[0] # 4=dir, 1=file full_path = f"{curr_path}/{name}" # Debug info type_str = "DIR" if typechar=="4" else "FILE" if typechar=="1" else "OTHER" print(f" -> {type_str}: {full_path}") if typechar == "1": # file c.execute("INSERT INTO paths(path,type_0file_1dir,F_INODE,F_MODE,F_LINKS,f_checked,deep_dir) VALUES(?,?,?,?,?,?,?)", (full_path, 0, inode, mode, links, 1, depth+1)) elif typechar == "4": # dir c.execute("INSERT INTO paths(path,type_0file_1dir,f_checked,deep_dir) VALUES(?,?,?,?)", (full_path, 1, 0, depth+1)) queue.append((full_path, depth+1)) # Mark current dir as checked c.execute("UPDATE paths SET f_checked=1 WHERE path=?", (curr_path,)) conn.commit() conn.close()
2nd pyscript to retrieve
#!/usr/bin/env python3 import sqlite3 import subprocess import os import shlex import unicodedata import re IMG = "/run/media/mydisk/ext4.img" # EXT4 image DB = "ext4lostfound_db.db" # SQLite DB with paths and inodes OUTDIR = "/mnt/tmp_drive/recover" # destination folder LOGFILE = "/mnt/tmp_drive/skipped_files.log" FAILED_LOG = "/mnt/tmp_drive/failed_files.log" os.makedirs(OUTDIR, exist_ok=True) conn = sqlite3.connect(DB) c = conn.cursor() def normalize_fullwidth(s): """Convert fullwidth Unicode characters to ASCII equivalents.""" return unicodedata.normalize('NFKC', s) def sanitize_path(path): """Normalize fullwidth characters in each component of the path.""" parts = path.split(os.sep) parts = [normalize_fullwidth(p) for p in parts] return os.sep.join(parts) def decode_escaped_path(file_path): """ Convert literal backslash-escaped sequences like \xef\xbc\x82 into proper Unicode characters, then normalize. """ # Step 1: interpret backslash escapes decoded = file_path.encode('utf-8').decode('unicode_escape') # Step 2: decode UTF-8 to proper Unicode s = decoded.encode('latin1').decode('utf-8', errors='replace') return s # Query all files c.execute("SELECT path, F_INODE FROM paths WHERE type_0file_1dir=0") files = c.fetchall() # Open log file in append mode with open(LOGFILE, "a", encoding="utf-8") as log_skip, \ open(FAILED_LOG, "a", encoding="utf-8") as log_fail: for file_path, inode in files: # --- your decoding, normalization, sanitize_path logic --- s = decode_escaped_path(file_path) rel_path = s.lstrip("/lost+found/") rel_path = sanitize_path(rel_path) local_path = os.path.join(OUTDIR, rel_path) os.makedirs(os.path.dirname(local_path), exist_ok=True) # Skip if already exists if os.path.exists(local_path): print(f"Skipping already existing file: {local_path}") log_skip.write(f"{local_path}\n") continue # Double quotes for debugfs safe_local_path = local_path.replace('"', '""') # Build and run debugfs command cmd = ['debugfs', '-R', f'dump <{inode}> "{safe_local_path}"', IMG] print('Recovering inode', inode, '->', local_path) print('Command:', ' '.join(cmd)) try: subprocess.run(cmd, check=True) # --- Post-write check --- if not os.path.exists(local_path) or os.path.getsize(local_path) == 0: print(f"Failed: file not created or empty -> {local_path}") log_fail.write(f"{local_path}\n") except subprocess.CalledProcessError: print(f"Failed to dump inode {inode} -> {local_path}") log_fail.write(f"{local_path}\n") conn.close() print("Recovery finished.")