made it smarter

This commit is contained in:
bnair
2026-01-03 14:20:32 +01:00
parent 4729c75e41
commit 51fc7e12bc
3 changed files with 228 additions and 47 deletions

View File

@@ -223,6 +223,11 @@ def process_file(filepath, log_category, lock_dir, log_dir, encoders, worker_id=
if shutdown_requested: return if shutdown_requested: return
# 0. Check if already processed in a previous run
if common.is_already_processed(log_dir, filepath):
update("Already Processed (Skipping)", "dim")
return
# 1. Lock Check (Shared Storage) # 1. Lock Check (Shared Storage)
lock_file = common.acquire_lock(lock_dir, filepath) lock_file = common.acquire_lock(lock_dir, filepath)
if not lock_file: if not lock_file:
@@ -402,6 +407,10 @@ def process_file(filepath, log_category, lock_dir, log_dir, encoders, worker_id=
"original_metadata": info, "original_metadata": info,
"encoded_metadata": final_info_verified or final_info "encoded_metadata": final_info_verified or final_info
}) })
# Mark as processed to prevent re-encoding in future runs
common.mark_processed(log_dir, filepath, chosen_codec, vmaf_score, final_savings)
update("Done", "green") update("Done", "green")
if status_cb: status_cb(worker_id, filename, f"STATS:SAVED:{saved_bytes}", "green") if status_cb: status_cb(worker_id, filename, f"STATS:SAVED:{saved_bytes}", "green")
else: else:
@@ -430,6 +439,8 @@ def main():
parser.add_argument("--skip-until", help="Skip all files alphabetically until this filename substring is found") parser.add_argument("--skip-until", help="Skip all files alphabetically until this filename substring is found")
parser.add_argument("--cpu-only", action="store_true", help="Force software encoding (CPU only)") parser.add_argument("--cpu-only", action="store_true", help="Force software encoding (CPU only)")
parser.add_argument("--temp-dir", help="Override local temp directory") parser.add_argument("--temp-dir", help="Override local temp directory")
parser.add_argument("--av1-encoder", choices=["hw", "sw", "off"], default="hw", help="AV1 encoder: hw (hardware), sw (software), off (disable)")
parser.add_argument("--hevc-encoder", choices=["hw", "sw", "off"], default="hw", help="HEVC encoder: hw (hardware), sw (software), off (disable)")
args = parser.parse_args() args = parser.parse_args()
if args.debug: if args.debug:
@@ -464,19 +475,43 @@ def main():
# 3. Scan & Queue # 3. Scan & Queue
tasks = [] tasks = []
# Skip-until filtering
skip_until = args.skip_until
skipping = bool(skip_until)
skipped_count = 0
tv_path = Path(args.tv_dir) tv_path = Path(args.tv_dir)
if tv_path.exists(): if tv_path.exists():
print(f"Scanning TV: {tv_path}") print(f"Scanning TV: {tv_path}")
files = list(tv_path.rglob("*.mkv")) + list(tv_path.rglob("*.mp4")) files = list(tv_path.rglob("*.mkv")) + list(tv_path.rglob("*.mp4"))
files.sort(key=lambda x: x.stat().st_size, reverse=True) files.sort(key=lambda x: x.stat().st_size, reverse=True)
for f in files: tasks.append((f, "tv_shows")) for f in files:
if skipping:
if skip_until.lower() in str(f).lower():
skipping = False
print(f" Found '{skip_until}' - resuming from here")
else:
skipped_count += 1
continue
tasks.append((f, "tv_shows"))
content_path = Path(args.content_dir) content_path = Path(args.content_dir)
if content_path.exists(): if content_path.exists():
print(f"Scanning Content: {content_path}") print(f"Scanning Content: {content_path}")
files = list(content_path.rglob("*.mkv")) + list(content_path.rglob("*.mp4")) files = list(content_path.rglob("*.mkv")) + list(content_path.rglob("*.mp4"))
files.sort(key=lambda x: x.stat().st_size, reverse=True) files.sort(key=lambda x: x.stat().st_size, reverse=True)
for f in files: tasks.append((f, "content")) for f in files:
if skipping:
if skip_until.lower() in str(f).lower():
skipping = False
print(f" Found '{skip_until}' - resuming from here")
else:
skipped_count += 1
continue
tasks.append((f, "content"))
if skipped_count > 0:
print(f" Skipped {skipped_count} files (--skip-until)")
if not tasks: if not tasks:
print("No files found.") print("No files found.")

View File

@@ -80,12 +80,22 @@ def fast_scan(path):
# --- UI State --- # --- UI State ---
class Dashboard: class Dashboard:
def __init__(self, num_workers): def __init__(self, num_workers, log_dir=None):
self.num_workers = num_workers self.num_workers = num_workers
self.worker_status = {i: {"file": "Idle", "action": "Waiting", "progress": 0, "speed": "", "color": "dim"} for i in range(num_workers)} self.worker_status = {i: {"file": "Idle", "action": "Waiting", "progress": 0, "speed": "", "color": "dim"} for i in range(num_workers)}
self.stats = {"processed": 0, "skipped": 0, "failed": 0, "rejected": 0, "savings_gb": 0.0} self.stats = {"processed": 0, "skipped": 0, "failed": 0, "rejected": 0, "savings_gb": 0.0}
self.recent_completed = [] self.recent_completed = []
self.lock = threading.Lock() self.lock = threading.Lock()
self.log_dir = log_dir
self.activity_log_file = None
# Open activity log file for streaming
if log_dir:
try:
log_path = Path(log_dir) / "activity.log"
self.activity_log_file = open(log_path, "a", encoding="utf-8")
except Exception as e:
print(f"[Warning] Could not open activity log: {e}")
def format_filename(self, filename): def format_filename(self, filename):
# Clean Sonarr format: {Series} - S{s}E{e} - {Title} {Quality} # Clean Sonarr format: {Series} - S{s}E{e} - {Title} {Quality}
@@ -136,11 +146,22 @@ class Dashboard:
def add_log(self, message): def add_log(self, message):
with self.lock: with self.lock:
ts = time.strftime("%H:%M:%S") ts = time.strftime("%Y-%m-%d %H:%M:%S")
self.recent_completed.insert(0, f"[{ts}] {message}") ts_short = time.strftime("%H:%M:%S")
log_entry = f"[{ts_short}] {message}"
self.recent_completed.insert(0, log_entry)
if len(self.recent_completed) > 12: if len(self.recent_completed) > 12:
self.recent_completed.pop() self.recent_completed.pop()
# Stream to activity log file
if self.activity_log_file:
try:
self.activity_log_file.write(f"[{ts}] {message}\n")
self.activity_log_file.flush()
except:
pass
def update_stats(self, key, val=1): def update_stats(self, key, val=1):
with self.lock: with self.lock:
if key == "savings_gb": self.stats[key] += val if key == "savings_gb": self.stats[key] += val
@@ -257,6 +278,9 @@ def main():
parser.add_argument("--monitor", action="store_true") parser.add_argument("--monitor", action="store_true")
parser.add_argument("--cpu-only", action="store_true", help="Force software encoding") parser.add_argument("--cpu-only", action="store_true", help="Force software encoding")
parser.add_argument("--temp-dir", help="Override local temp directory") parser.add_argument("--temp-dir", help="Override local temp directory")
parser.add_argument("--skip-until", help="Skip files until this substring is found in filename")
parser.add_argument("--av1-encoder", choices=["hw", "sw", "off"], default="hw", help="AV1 encoder: hw (hardware), sw (software), off (disable)")
parser.add_argument("--hevc-encoder", choices=["hw", "sw", "off"], default="hw", help="HEVC encoder: hw (hardware), sw (software), off (disable)")
args = parser.parse_args() args = parser.parse_args()
# Setup # Setup
@@ -267,7 +291,7 @@ def main():
encoders = common.detect_hardware_encoder(args) encoders = common.detect_hardware_encoder(args)
# UI # UI
dashboard = Dashboard(args.jobs) dashboard = Dashboard(args.jobs, log_dir=log_dir)
dashboard.add_log(f"Logs: {log_dir}") dashboard.add_log(f"Logs: {log_dir}")
# Work Queue # Work Queue
@@ -278,11 +302,30 @@ def main():
time.sleep(2) # Let UI start time.sleep(2) # Let UI start
dashboard.add_log("Starting background scan...") dashboard.add_log("Starting background scan...")
# Skip-until logic
skip_until = args.skip_until
skipping = bool(skip_until) # True until we find the match
skipped_count = 0
def should_skip(filepath):
nonlocal skipping, skipped_count
if not skipping:
return False
# Check if skip_until substring is in the filepath
if skip_until.lower() in str(filepath).lower():
skipping = False # Found it, stop skipping
dashboard.add_log(f"Found '{skip_until}' - resuming from here")
return False
skipped_count += 1
return True
# Load Cache first # Load Cache first
cached_files = load_cache() cached_files = load_cache()
if cached_files: if cached_files:
dashboard.add_log(f"Loaded {len(cached_files)} files from cache.") dashboard.add_log(f"Loaded {len(cached_files)} files from cache.")
for f in cached_files: for f in cached_files:
if should_skip(f):
continue
p = Path(f) p = Path(f)
cat = "tv_shows" if str(args.tv_dir) in str(p) else "content" cat = "tv_shows" if str(args.tv_dir) in str(p) else "content"
work_queue.put((p, cat)) work_queue.put((p, cat))
@@ -295,8 +338,13 @@ def main():
all_files.append(f) all_files.append(f)
# Only add if NOT in cache # Only add if NOT in cache
if str(f) not in cached_files: if str(f) not in cached_files:
if should_skip(f):
continue
work_queue.put((Path(f), cat)) work_queue.put((Path(f), cat))
if skipped_count > 0:
dashboard.add_log(f"Skipped {skipped_count} files (--skip-until)")
dashboard.add_log(f"Scan complete. Total: {len(all_files)}") dashboard.add_log(f"Scan complete. Total: {len(all_files)}")
save_cache(all_files) save_cache(all_files)

View File

@@ -25,7 +25,7 @@ DEFAULT_CONFIG = {
# --- Paths --- # --- Paths ---
def get_base_paths(args=None): def get_base_paths(args=None):
""" r"""
Determine root paths for locks and logs. Determine root paths for locks and logs.
Priority: Priority:
1. Shared Network Drive (parent of tv_dir) -> Z:\.vmaf_locks 1. Shared Network Drive (parent of tv_dir) -> Z:\.vmaf_locks
@@ -107,6 +107,62 @@ def log_event(log_dir, filename, data):
except Exception as e: except Exception as e:
print(f"[ERROR] Failed to write log: {e}") print(f"[ERROR] Failed to write log: {e}")
# --- Global Processed Log ---
# Prevents re-encoding files that were already processed in previous runs
_processed_cache = None
def load_processed_log(log_dir):
"""Load set of already-processed file paths from processed.jsonl"""
global _processed_cache
if _processed_cache is not None:
return _processed_cache
_processed_cache = set()
log_path = Path(log_dir) / "processed.jsonl"
if log_path.exists():
try:
with open(log_path, "r", encoding="utf-8") as f:
for line in f:
try:
entry = json.loads(line.strip())
if "file" in entry:
_processed_cache.add(entry["file"])
except:
continue
except Exception as e:
print(f"[Warning] Could not load processed log: {e}")
return _processed_cache
def is_already_processed(log_dir, filepath):
"""Check if file was already processed in a previous run"""
processed = load_processed_log(log_dir)
return str(filepath) in processed
def mark_processed(log_dir, filepath, codec, vmaf, savings):
"""Mark a file as processed (prevents re-encoding in future runs)"""
global _processed_cache
log_path = Path(log_dir) / "processed.jsonl"
entry = {
"file": str(filepath),
"codec": codec,
"vmaf": vmaf,
"savings": savings,
"timestamp": datetime.now().isoformat()
}
try:
with open(log_path, "a", encoding="utf-8") as f:
f.write(json.dumps(entry) + "\n")
# Update cache
if _processed_cache is not None:
_processed_cache.add(str(filepath))
except Exception as e:
print(f"[Warning] Could not write to processed log: {e}")
# --- Dependencies --- # --- Dependencies ---
def check_dependencies(required_tools=None): def check_dependencies(required_tools=None):
if required_tools is None: if required_tools is None:
@@ -197,65 +253,107 @@ def acquire_lock(lock_dir, filepath):
# --- Hardware Detection --- # --- Hardware Detection ---
def detect_hardware_encoder(args=None): def detect_hardware_encoder(args=None):
"""Detects available hardware encoders via ffmpeg (Cross-Platform)""" """
Detects available hardware encoders via ffmpeg (Cross-Platform).
# Check for forced CPU mode via args Supports --av1-encoder and --hevc-encoder flags:
if args and getattr(args, 'cpu_only', False): hw = prefer hardware encoder (error if unavailable)
# We still need to determine WHICH software encoder to use sw = force software encoder
# But we skip HW checks. off = disable this codec entirely
try: """
res = subprocess.run(["ffmpeg", "-hide_banner", "-encoders"], capture_output=True, text=True)
out = res.stdout # Get user preferences from args
if "libsvtav1" in out: return "libsvtav1", "libx265", "cpu" av1_pref = getattr(args, 'av1_encoder', 'hw') if args else 'hw'
if "libx265" in out: return None, "libx265", "cpu" hevc_pref = getattr(args, 'hevc_encoder', 'hw') if args else 'hw'
return None, None, "cpu" cpu_only = getattr(args, 'cpu_only', False) if args else False
except:
return None, None, "cpu" # If cpu_only, treat both as 'sw'
if cpu_only:
av1_pref = 'sw'
hevc_pref = 'sw'
try: try:
# Run ffmpeg -encoders # Run ffmpeg -encoders
res = subprocess.run(["ffmpeg", "-hide_banner", "-encoders"], capture_output=True, text=True) res = subprocess.run(["ffmpeg", "-hide_banner", "-encoders"], capture_output=True, text=True)
out = res.stdout out = res.stdout
av1_enc = None # Build available encoder maps
hevc_enc = None hw_av1_encoders = []
hw_hevc_encoders = []
hw_type = "cpu" hw_type = "cpu"
# 1. AMD (AMF) - Windows (Preferred) # 1. AMD (AMF) - Windows (Preferred)
if "av1_amf" in out: av1_enc = "av1_amf" if "av1_amf" in out: hw_av1_encoders.append(("av1_amf", "amf"))
if "hevc_amf" in out: hevc_enc = "hevc_amf" if "hevc_amf" in out: hw_hevc_encoders.append(("hevc_amf", "amf"))
if av1_enc or hevc_enc: return av1_enc, hevc_enc, "amf"
# 2. NVIDIA (NVENC) - Windows/Linux # 2. NVIDIA (NVENC) - Windows/Linux
if "av1_nvenc" in out: av1_enc = "av1_nvenc" if "av1_nvenc" in out: hw_av1_encoders.append(("av1_nvenc", "nvenc"))
if "hevc_nvenc" in out: hevc_enc = "hevc_nvenc" if "hevc_nvenc" in out: hw_hevc_encoders.append(("hevc_nvenc", "nvenc"))
if av1_enc or hevc_enc: return av1_enc, hevc_enc, "nvenc"
# 3. AMD (VAAPI) - Linux # 3. AMD (VAAPI) - Linux
# Often named hevc_vaapi, av1_vaapi if "av1_vaapi" in out: hw_av1_encoders.append(("av1_vaapi", "vaapi"))
if "av1_vaapi" in out: av1_enc = "av1_vaapi" if "hevc_vaapi" in out: hw_hevc_encoders.append(("hevc_vaapi", "vaapi"))
if "hevc_vaapi" in out: hevc_enc = "hevc_vaapi"
if av1_enc or hevc_enc: return av1_enc, hevc_enc, "vaapi"
# 4. Intel (QSV) - Windows/Linux # 4. Intel (QSV) - Windows/Linux
if "av1_qsv" in out: av1_enc = "av1_qsv" if "av1_qsv" in out: hw_av1_encoders.append(("av1_qsv", "qsv"))
if "hevc_qsv" in out: hevc_enc = "hevc_qsv" if "hevc_qsv" in out: hw_hevc_encoders.append(("hevc_qsv", "qsv"))
if av1_enc or hevc_enc: return av1_enc, hevc_enc, "qsv"
# 5. Apple Silicon (VideoToolbox) - macOS # 5. Apple Silicon (VideoToolbox) - macOS
if "av1_videotoolbox" in out: av1_enc = "av1_videotoolbox" if "av1_videotoolbox" in out: hw_av1_encoders.append(("av1_videotoolbox", "videotoolbox"))
if "hevc_videotoolbox" in out: hevc_enc = "hevc_videotoolbox" if "hevc_videotoolbox" in out: hw_hevc_encoders.append(("hevc_videotoolbox", "videotoolbox"))
if av1_enc or hevc_enc: return av1_enc, hevc_enc, "videotoolbox"
# Fallback to Software if no HW found # Software encoders
# libsvtav1 / libx265 has_libsvtav1 = "libsvtav1" in out
if "libsvtav1" in out: av1_enc = "libsvtav1" has_libx265 = "libx265" in out
if "libx265" in out: hevc_enc = "libx265"
if av1_enc or hevc_enc: # Resolve AV1 encoder
return av1_enc, hevc_enc, "cpu" av1_enc = None
if av1_pref == 'off':
av1_enc = None
elif av1_pref == 'sw':
if has_libsvtav1:
av1_enc = "libsvtav1"
else:
print("[Warning] libsvtav1 not available, AV1 disabled")
elif av1_pref == 'hw':
if hw_av1_encoders:
av1_enc, hw_type = hw_av1_encoders[0] # First available (AMD priority)
elif has_libsvtav1:
av1_enc = "libsvtav1"
print("[Info] No AV1 HW encoder, using libsvtav1 (CPU)")
else:
print("[Warning] No AV1 encoder available")
return None, None, "none" # Resolve HEVC encoder
hevc_enc = None
if hevc_pref == 'off':
hevc_enc = None
elif hevc_pref == 'sw':
if has_libx265:
hevc_enc = "libx265"
else:
print("[Warning] libx265 not available, HEVC disabled")
elif hevc_pref == 'hw':
if hw_hevc_encoders:
hevc_enc, hevc_hw = hw_hevc_encoders[0]
# Update hw_type if we didn't get one from AV1
if hw_type == "cpu":
hw_type = hevc_hw
elif has_libx265:
hevc_enc = "libx265"
print("[Info] No HEVC HW encoder, using libx265 (CPU)")
else:
print("[Warning] No HEVC encoder available")
# Determine final hw_type label
if av1_enc and "lib" not in av1_enc:
pass # hw_type already set
elif hevc_enc and "lib" not in hevc_enc:
pass # hw_type already set from HEVC
else:
hw_type = "cpu"
return av1_enc, hevc_enc, hw_type
except Exception as e: except Exception as e:
print(f"[Warning] HW Detection failed: {e}") print(f"[Warning] HW Detection failed: {e}")