1

How to dump the lost+found content from an ext4 img file that cannot be mounted using debugfs in linux and keep the folder hierarchy?

My ext4 disk was failing. I made a disk image. I tried e2fsck and other tools to fix it. It refuses to mount. e2fsck can't repair the image.

However, I can see -using debugfs- that there are a lot of content inside lost+found folder. Almost all data are there. However, debugfs does not provide a tool to extract all this data automatically and keeping any directory hierarchy that may exists. Yes, there is a folder hierarchy in lost+found. eg. /lost+found/#1233131/folderA/folderB/file.mp4.

So i am looking for a solution to automatically restore all this data.

1 Answer 1

0

I made two python scripts

the 1st one is extracting metadata only (fullpath, mtime, size, mode etc.), and save them in a sql database. Then the 2nd python script retrieve the files from these paths to a recovery folder keeping the full path (hierarchy) of the lost+found. Is taking into consideration weird chars, spaces, quotes and other things. However, be cautious, and always check with a few files that are the most weird ones.

1st pyscript

import subprocess import sqlite3 IMG = "/run/media/mydisk/ext4.img" DB = "ext4lostfound_db.db" # SQLite DB setup conn = sqlite3.connect(DB) c = conn.cursor() c.execute(""" DROP TABLE IF EXISTS paths; """) c.execute(""" CREATE TABLE paths( path TEXT, type_0file_1dir INTEGER, F_INODE INTEGER, F_MODE TEXT, F_LINKS INTEGER, F_UID INTEGER, F_GID INTEGER, F_SIZE INTEGER, F_MTIME TEXT, f_checked INTEGER DEFAULT 0, deep_dir INTEGER DEFAULT 0 ) """) conn.commit() # BFS queue queue = [("/lost+found", 1)] # tuple: (path, depth) while queue: curr_path, depth = queue.pop(0) print(f"Scanning: {curr_path}, depth={depth}") # Run debugfs and capture stdout try: out = subprocess.check_output( ['debugfs', '-R', f'ls -l "{curr_path}"', IMG], stderr=subprocess.DEVNULL, text=True, encoding='utf-8' ) except subprocess.CalledProcessError: print(f"Failed to read {curr_path}, marking as error") c.execute("INSERT INTO paths(path,type_0file_1dir,f_checked,deep_dir) VALUES(?,?,?,?)", (curr_path, 1, -1, depth)) conn.commit() continue # Remove debugfs banner if present lines = out.splitlines() if lines and lines[0].startswith("debugfs"): lines = lines[1:] for line in lines: if not line.strip(): continue parts = line.split() inode = parts[0] mode = parts[1] links = parts[2].strip("()") name = " ".join(parts[8:]) # Skip '.' and '..' to avoid cycles if name in ('.', '..'): continue typechar = mode[0] # 4=dir, 1=file full_path = f"{curr_path}/{name}" # Debug info type_str = "DIR" if typechar=="4" else "FILE" if typechar=="1" else "OTHER" print(f" -> {type_str}: {full_path}") if typechar == "1": # file c.execute("INSERT INTO paths(path,type_0file_1dir,F_INODE,F_MODE,F_LINKS,f_checked,deep_dir) VALUES(?,?,?,?,?,?,?)", (full_path, 0, inode, mode, links, 1, depth+1)) elif typechar == "4": # dir c.execute("INSERT INTO paths(path,type_0file_1dir,f_checked,deep_dir) VALUES(?,?,?,?)", (full_path, 1, 0, depth+1)) queue.append((full_path, depth+1)) # Mark current dir as checked c.execute("UPDATE paths SET f_checked=1 WHERE path=?", (curr_path,)) conn.commit() conn.close() 

2nd pyscript to retrieve

#!/usr/bin/env python3 import sqlite3 import subprocess import os import shlex import unicodedata import re IMG = "/run/media/mydisk/ext4.img" # EXT4 image DB = "ext4lostfound_db.db" # SQLite DB with paths and inodes OUTDIR = "/mnt/tmp_drive/recover" # destination folder LOGFILE = "/mnt/tmp_drive/skipped_files.log" FAILED_LOG = "/mnt/tmp_drive/failed_files.log" os.makedirs(OUTDIR, exist_ok=True) conn = sqlite3.connect(DB) c = conn.cursor() def normalize_fullwidth(s): """Convert fullwidth Unicode characters to ASCII equivalents.""" return unicodedata.normalize('NFKC', s) def sanitize_path(path): """Normalize fullwidth characters in each component of the path.""" parts = path.split(os.sep) parts = [normalize_fullwidth(p) for p in parts] return os.sep.join(parts) def decode_escaped_path(file_path): """ Convert literal backslash-escaped sequences like \xef\xbc\x82 into proper Unicode characters, then normalize. """ # Step 1: interpret backslash escapes decoded = file_path.encode('utf-8').decode('unicode_escape') # Step 2: decode UTF-8 to proper Unicode s = decoded.encode('latin1').decode('utf-8', errors='replace') return s # Query all files c.execute("SELECT path, F_INODE FROM paths WHERE type_0file_1dir=0") files = c.fetchall() # Open log file in append mode with open(LOGFILE, "a", encoding="utf-8") as log_skip, \ open(FAILED_LOG, "a", encoding="utf-8") as log_fail: for file_path, inode in files: # --- your decoding, normalization, sanitize_path logic --- s = decode_escaped_path(file_path) rel_path = s.lstrip("/lost+found/") rel_path = sanitize_path(rel_path) local_path = os.path.join(OUTDIR, rel_path) os.makedirs(os.path.dirname(local_path), exist_ok=True) # Skip if already exists if os.path.exists(local_path): print(f"Skipping already existing file: {local_path}") log_skip.write(f"{local_path}\n") continue # Double quotes for debugfs safe_local_path = local_path.replace('"', '""') # Build and run debugfs command cmd = ['debugfs', '-R', f'dump <{inode}> "{safe_local_path}"', IMG] print('Recovering inode', inode, '->', local_path) print('Command:', ' '.join(cmd)) try: subprocess.run(cmd, check=True) # --- Post-write check --- if not os.path.exists(local_path) or os.path.getsize(local_path) == 0: print(f"Failed: file not created or empty -> {local_path}") log_fail.write(f"{local_path}\n") except subprocess.CalledProcessError: print(f"Failed to dump inode {inode} -> {local_path}") log_fail.write(f"{local_path}\n") conn.close() print("Recovery finished.") 

You must log in to answer this question.

Start asking to get answers

Find the answer to your question by asking.

Ask question

Explore related questions

See similar questions with these tags.