made it smarter

This commit is contained in:
bnair
2026-01-03 14:20:32 +01:00
parent 4729c75e41
commit 51fc7e12bc
3 changed files with 228 additions and 47 deletions

View File

@@ -223,6 +223,11 @@ def process_file(filepath, log_category, lock_dir, log_dir, encoders, worker_id=
if shutdown_requested: return
# 0. Check if already processed in a previous run
if common.is_already_processed(log_dir, filepath):
update("Already Processed (Skipping)", "dim")
return
# 1. Lock Check (Shared Storage)
lock_file = common.acquire_lock(lock_dir, filepath)
if not lock_file:
@@ -402,6 +407,10 @@ def process_file(filepath, log_category, lock_dir, log_dir, encoders, worker_id=
"original_metadata": info,
"encoded_metadata": final_info_verified or final_info
})
# Mark as processed to prevent re-encoding in future runs
common.mark_processed(log_dir, filepath, chosen_codec, vmaf_score, final_savings)
update("Done", "green")
if status_cb: status_cb(worker_id, filename, f"STATS:SAVED:{saved_bytes}", "green")
else:
@@ -430,6 +439,8 @@ def main():
parser.add_argument("--skip-until", help="Skip all files alphabetically until this filename substring is found")
parser.add_argument("--cpu-only", action="store_true", help="Force software encoding (CPU only)")
parser.add_argument("--temp-dir", help="Override local temp directory")
parser.add_argument("--av1-encoder", choices=["hw", "sw", "off"], default="hw", help="AV1 encoder: hw (hardware), sw (software), off (disable)")
parser.add_argument("--hevc-encoder", choices=["hw", "sw", "off"], default="hw", help="HEVC encoder: hw (hardware), sw (software), off (disable)")
args = parser.parse_args()
if args.debug:
@@ -464,19 +475,43 @@ def main():
# 3. Scan & Queue
tasks = []
# Skip-until filtering
skip_until = args.skip_until
skipping = bool(skip_until)
skipped_count = 0
tv_path = Path(args.tv_dir)
if tv_path.exists():
print(f"Scanning TV: {tv_path}")
files = list(tv_path.rglob("*.mkv")) + list(tv_path.rglob("*.mp4"))
files.sort(key=lambda x: x.stat().st_size, reverse=True)
for f in files: tasks.append((f, "tv_shows"))
for f in files:
if skipping:
if skip_until.lower() in str(f).lower():
skipping = False
print(f" Found '{skip_until}' - resuming from here")
else:
skipped_count += 1
continue
tasks.append((f, "tv_shows"))
content_path = Path(args.content_dir)
if content_path.exists():
print(f"Scanning Content: {content_path}")
files = list(content_path.rglob("*.mkv")) + list(content_path.rglob("*.mp4"))
files.sort(key=lambda x: x.stat().st_size, reverse=True)
for f in files: tasks.append((f, "content"))
for f in files:
if skipping:
if skip_until.lower() in str(f).lower():
skipping = False
print(f" Found '{skip_until}' - resuming from here")
else:
skipped_count += 1
continue
tasks.append((f, "content"))
if skipped_count > 0:
print(f" Skipped {skipped_count} files (--skip-until)")
if not tasks:
print("No files found.")

View File

@@ -80,12 +80,22 @@ def fast_scan(path):
# --- UI State ---
class Dashboard:
def __init__(self, num_workers):
def __init__(self, num_workers, log_dir=None):
self.num_workers = num_workers
self.worker_status = {i: {"file": "Idle", "action": "Waiting", "progress": 0, "speed": "", "color": "dim"} for i in range(num_workers)}
self.stats = {"processed": 0, "skipped": 0, "failed": 0, "rejected": 0, "savings_gb": 0.0}
self.recent_completed = []
self.lock = threading.Lock()
self.log_dir = log_dir
self.activity_log_file = None
# Open activity log file for streaming
if log_dir:
try:
log_path = Path(log_dir) / "activity.log"
self.activity_log_file = open(log_path, "a", encoding="utf-8")
except Exception as e:
print(f"[Warning] Could not open activity log: {e}")
def format_filename(self, filename):
# Clean Sonarr format: {Series} - S{s}E{e} - {Title} {Quality}
@@ -136,10 +146,21 @@ class Dashboard:
def add_log(self, message):
with self.lock:
ts = time.strftime("%H:%M:%S")
self.recent_completed.insert(0, f"[{ts}] {message}")
ts = time.strftime("%Y-%m-%d %H:%M:%S")
ts_short = time.strftime("%H:%M:%S")
log_entry = f"[{ts_short}] {message}"
self.recent_completed.insert(0, log_entry)
if len(self.recent_completed) > 12:
self.recent_completed.pop()
# Stream to activity log file
if self.activity_log_file:
try:
self.activity_log_file.write(f"[{ts}] {message}\n")
self.activity_log_file.flush()
except:
pass
def update_stats(self, key, val=1):
with self.lock:
@@ -257,6 +278,9 @@ def main():
parser.add_argument("--monitor", action="store_true")
parser.add_argument("--cpu-only", action="store_true", help="Force software encoding")
parser.add_argument("--temp-dir", help="Override local temp directory")
parser.add_argument("--skip-until", help="Skip files until this substring is found in filename")
parser.add_argument("--av1-encoder", choices=["hw", "sw", "off"], default="hw", help="AV1 encoder: hw (hardware), sw (software), off (disable)")
parser.add_argument("--hevc-encoder", choices=["hw", "sw", "off"], default="hw", help="HEVC encoder: hw (hardware), sw (software), off (disable)")
args = parser.parse_args()
# Setup
@@ -267,7 +291,7 @@ def main():
encoders = common.detect_hardware_encoder(args)
# UI
dashboard = Dashboard(args.jobs)
dashboard = Dashboard(args.jobs, log_dir=log_dir)
dashboard.add_log(f"Logs: {log_dir}")
# Work Queue
@@ -278,11 +302,30 @@ def main():
time.sleep(2) # Let UI start
dashboard.add_log("Starting background scan...")
# Skip-until logic
skip_until = args.skip_until
skipping = bool(skip_until) # True until we find the match
skipped_count = 0
def should_skip(filepath):
nonlocal skipping, skipped_count
if not skipping:
return False
# Check if skip_until substring is in the filepath
if skip_until.lower() in str(filepath).lower():
skipping = False # Found it, stop skipping
dashboard.add_log(f"Found '{skip_until}' - resuming from here")
return False
skipped_count += 1
return True
# Load Cache first
cached_files = load_cache()
if cached_files:
dashboard.add_log(f"Loaded {len(cached_files)} files from cache.")
for f in cached_files:
if should_skip(f):
continue
p = Path(f)
cat = "tv_shows" if str(args.tv_dir) in str(p) else "content"
work_queue.put((p, cat))
@@ -295,7 +338,12 @@ def main():
all_files.append(f)
# Only add if NOT in cache
if str(f) not in cached_files:
if should_skip(f):
continue
work_queue.put((Path(f), cat))
if skipped_count > 0:
dashboard.add_log(f"Skipped {skipped_count} files (--skip-until)")
dashboard.add_log(f"Scan complete. Total: {len(all_files)}")
save_cache(all_files)

View File

@@ -25,7 +25,7 @@ DEFAULT_CONFIG = {
# --- Paths ---
def get_base_paths(args=None):
"""
r"""
Determine root paths for locks and logs.
Priority:
1. Shared Network Drive (parent of tv_dir) -> Z:\.vmaf_locks
@@ -107,6 +107,62 @@ def log_event(log_dir, filename, data):
except Exception as e:
print(f"[ERROR] Failed to write log: {e}")
# --- Global Processed Log ---
# Prevents re-encoding files that were already processed in previous runs
_processed_cache = None
def load_processed_log(log_dir):
"""Load set of already-processed file paths from processed.jsonl"""
global _processed_cache
if _processed_cache is not None:
return _processed_cache
_processed_cache = set()
log_path = Path(log_dir) / "processed.jsonl"
if log_path.exists():
try:
with open(log_path, "r", encoding="utf-8") as f:
for line in f:
try:
entry = json.loads(line.strip())
if "file" in entry:
_processed_cache.add(entry["file"])
except:
continue
except Exception as e:
print(f"[Warning] Could not load processed log: {e}")
return _processed_cache
def is_already_processed(log_dir, filepath):
"""Check if file was already processed in a previous run"""
processed = load_processed_log(log_dir)
return str(filepath) in processed
def mark_processed(log_dir, filepath, codec, vmaf, savings):
"""Mark a file as processed (prevents re-encoding in future runs)"""
global _processed_cache
log_path = Path(log_dir) / "processed.jsonl"
entry = {
"file": str(filepath),
"codec": codec,
"vmaf": vmaf,
"savings": savings,
"timestamp": datetime.now().isoformat()
}
try:
with open(log_path, "a", encoding="utf-8") as f:
f.write(json.dumps(entry) + "\n")
# Update cache
if _processed_cache is not None:
_processed_cache.add(str(filepath))
except Exception as e:
print(f"[Warning] Could not write to processed log: {e}")
# --- Dependencies ---
def check_dependencies(required_tools=None):
if required_tools is None:
@@ -197,65 +253,107 @@ def acquire_lock(lock_dir, filepath):
# --- Hardware Detection ---
def detect_hardware_encoder(args=None):
"""Detects available hardware encoders via ffmpeg (Cross-Platform)"""
"""
Detects available hardware encoders via ffmpeg (Cross-Platform).
Supports --av1-encoder and --hevc-encoder flags:
hw = prefer hardware encoder (error if unavailable)
sw = force software encoder
off = disable this codec entirely
"""
# Get user preferences from args
av1_pref = getattr(args, 'av1_encoder', 'hw') if args else 'hw'
hevc_pref = getattr(args, 'hevc_encoder', 'hw') if args else 'hw'
cpu_only = getattr(args, 'cpu_only', False) if args else False
# If cpu_only, treat both as 'sw'
if cpu_only:
av1_pref = 'sw'
hevc_pref = 'sw'
# Check for forced CPU mode via args
if args and getattr(args, 'cpu_only', False):
# We still need to determine WHICH software encoder to use
# But we skip HW checks.
try:
res = subprocess.run(["ffmpeg", "-hide_banner", "-encoders"], capture_output=True, text=True)
out = res.stdout
if "libsvtav1" in out: return "libsvtav1", "libx265", "cpu"
if "libx265" in out: return None, "libx265", "cpu"
return None, None, "cpu"
except:
return None, None, "cpu"
try:
# Run ffmpeg -encoders
res = subprocess.run(["ffmpeg", "-hide_banner", "-encoders"], capture_output=True, text=True)
out = res.stdout
av1_enc = None
hevc_enc = None
# Build available encoder maps
hw_av1_encoders = []
hw_hevc_encoders = []
hw_type = "cpu"
# 1. AMD (AMF) - Windows (Preferred)
if "av1_amf" in out: av1_enc = "av1_amf"
if "hevc_amf" in out: hevc_enc = "hevc_amf"
if av1_enc or hevc_enc: return av1_enc, hevc_enc, "amf"
if "av1_amf" in out: hw_av1_encoders.append(("av1_amf", "amf"))
if "hevc_amf" in out: hw_hevc_encoders.append(("hevc_amf", "amf"))
# 2. NVIDIA (NVENC) - Windows/Linux
if "av1_nvenc" in out: av1_enc = "av1_nvenc"
if "hevc_nvenc" in out: hevc_enc = "hevc_nvenc"
if av1_enc or hevc_enc: return av1_enc, hevc_enc, "nvenc"
if "av1_nvenc" in out: hw_av1_encoders.append(("av1_nvenc", "nvenc"))
if "hevc_nvenc" in out: hw_hevc_encoders.append(("hevc_nvenc", "nvenc"))
# 3. AMD (VAAPI) - Linux
# Often named hevc_vaapi, av1_vaapi
if "av1_vaapi" in out: av1_enc = "av1_vaapi"
if "hevc_vaapi" in out: hevc_enc = "hevc_vaapi"
if av1_enc or hevc_enc: return av1_enc, hevc_enc, "vaapi"
if "av1_vaapi" in out: hw_av1_encoders.append(("av1_vaapi", "vaapi"))
if "hevc_vaapi" in out: hw_hevc_encoders.append(("hevc_vaapi", "vaapi"))
# 4. Intel (QSV) - Windows/Linux
if "av1_qsv" in out: av1_enc = "av1_qsv"
if "hevc_qsv" in out: hevc_enc = "hevc_qsv"
if av1_enc or hevc_enc: return av1_enc, hevc_enc, "qsv"
if "av1_qsv" in out: hw_av1_encoders.append(("av1_qsv", "qsv"))
if "hevc_qsv" in out: hw_hevc_encoders.append(("hevc_qsv", "qsv"))
# 5. Apple Silicon (VideoToolbox) - macOS
if "av1_videotoolbox" in out: av1_enc = "av1_videotoolbox"
if "hevc_videotoolbox" in out: hevc_enc = "hevc_videotoolbox"
if av1_enc or hevc_enc: return av1_enc, hevc_enc, "videotoolbox"
if "av1_videotoolbox" in out: hw_av1_encoders.append(("av1_videotoolbox", "videotoolbox"))
if "hevc_videotoolbox" in out: hw_hevc_encoders.append(("hevc_videotoolbox", "videotoolbox"))
# Fallback to Software if no HW found
# libsvtav1 / libx265
if "libsvtav1" in out: av1_enc = "libsvtav1"
if "libx265" in out: hevc_enc = "libx265"
# Software encoders
has_libsvtav1 = "libsvtav1" in out
has_libx265 = "libx265" in out
if av1_enc or hevc_enc:
return av1_enc, hevc_enc, "cpu"
# Resolve AV1 encoder
av1_enc = None
if av1_pref == 'off':
av1_enc = None
elif av1_pref == 'sw':
if has_libsvtav1:
av1_enc = "libsvtav1"
else:
print("[Warning] libsvtav1 not available, AV1 disabled")
elif av1_pref == 'hw':
if hw_av1_encoders:
av1_enc, hw_type = hw_av1_encoders[0] # First available (AMD priority)
elif has_libsvtav1:
av1_enc = "libsvtav1"
print("[Info] No AV1 HW encoder, using libsvtav1 (CPU)")
else:
print("[Warning] No AV1 encoder available")
return None, None, "none"
# Resolve HEVC encoder
hevc_enc = None
if hevc_pref == 'off':
hevc_enc = None
elif hevc_pref == 'sw':
if has_libx265:
hevc_enc = "libx265"
else:
print("[Warning] libx265 not available, HEVC disabled")
elif hevc_pref == 'hw':
if hw_hevc_encoders:
hevc_enc, hevc_hw = hw_hevc_encoders[0]
# Update hw_type if we didn't get one from AV1
if hw_type == "cpu":
hw_type = hevc_hw
elif has_libx265:
hevc_enc = "libx265"
print("[Info] No HEVC HW encoder, using libx265 (CPU)")
else:
print("[Warning] No HEVC encoder available")
# Determine final hw_type label
if av1_enc and "lib" not in av1_enc:
pass # hw_type already set
elif hevc_enc and "lib" not in hevc_enc:
pass # hw_type already set from HEVC
else:
hw_type = "cpu"
return av1_enc, hevc_enc, hw_type
except Exception as e:
print(f"[Warning] HW Detection failed: {e}")