| |
| """ |
| Upload KeyVID model to Hugging Face Hub (Optimized for Speed) |
| """ |
|
|
| from pathlib import Path |
| from huggingface_hub import HfApi, login, upload_folder |
| import os |
| from concurrent.futures import ThreadPoolExecutor, as_completed |
| from tqdm import tqdm |
| import time |
|
|
| |
| MODEL_ID = "RyanWW/KeyVID" |
|
|
| |
| KEYVID_PATH = "/dockerx/groups/KeyVID_hf_model" |
|
|
| |
| MAX_WORKERS = 8 |
| CHUNK_SIZE = 100 * 1024 * 1024 |
|
|
| def should_exclude_file(file_path, exclude_patterns): |
| """检查文件是否应该被排除""" |
| file_str = str(file_path) |
| for pattern in exclude_patterns: |
| |
| if '*' in pattern: |
| pattern_parts = pattern.split('*') |
| if all(part in file_str for part in pattern_parts if part): |
| return True |
| elif pattern in file_str: |
| return True |
| return False |
|
|
| def get_files_to_upload(keyvid_dir, exclude_patterns): |
| """获取需要上传的文件列表""" |
| files = [] |
| total_size = 0 |
| |
| print("🔍 Scanning files...") |
| for file_path in tqdm(keyvid_dir.rglob("*"), desc="Scanning"): |
| if file_path.is_file(): |
| relative_path = file_path.relative_to(keyvid_dir) |
| |
| if not should_exclude_file(relative_path, exclude_patterns): |
| file_size = file_path.stat().st_size |
| files.append((relative_path, file_size)) |
| total_size += file_size |
| |
| return files, total_size |
|
|
| def format_size(size_bytes): |
| """格式化文件大小""" |
| for unit in ['B', 'KB', 'MB', 'GB', 'TB']: |
| if size_bytes < 1024.0: |
| return f"{size_bytes:.2f} {unit}" |
| size_bytes /= 1024.0 |
| return f"{size_bytes:.2f} PB" |
|
|
| def upload_file_wrapper(args): |
| """包装文件上传函数用于并发""" |
| api, keyvid_dir, file_path, repo_id = args |
| try: |
| full_path = keyvid_dir / file_path |
| api.upload_file( |
| path_or_fileobj=str(full_path), |
| path_in_repo=str(file_path), |
| repo_id=repo_id, |
| repo_type="model", |
| commit_message="Upload KeyVID model files", |
| |
| ) |
| return (file_path, True, None) |
| except Exception as e: |
| return (file_path, False, str(e)) |
|
|
| def main(): |
| print("🚀 Starting KeyVID upload to Hugging Face (Optimized)...") |
| print(f"Repository: {MODEL_ID}") |
| |
| |
| try: |
| api = HfApi() |
| print("✅ Hugging Face authentication found") |
| except Exception as e: |
| print("⚠️ Need to authenticate with Hugging Face") |
| print("Please run: huggingface-cli login") |
| print("Or set HF_TOKEN environment variable") |
| return |
| |
| |
| keyvid_dir = Path(KEYVID_PATH) |
| |
| if not keyvid_dir.exists(): |
| print(f"❌ Error: KeyVID directory not found at {KEYVID_PATH}") |
| return |
| |
| print(f"\n📁 Directory: {keyvid_dir}") |
| |
| |
| exclude_patterns = [ |
| "__pycache__", |
| ".git", |
| "*.pyc", |
| ".DS_Store", |
| "save_results/", |
| "*.log", |
| "*.tmp", |
| "error.txt", |
| ".bash_history", |
| ".gitignore", |
| "upload.py", |
| ] |
| |
| |
| files_to_upload, total_size = get_files_to_upload(keyvid_dir, exclude_patterns) |
| |
| print(f"\n📊 Statistics:") |
| print(f" Files to upload: {len(files_to_upload)}") |
| print(f" Total size: {format_size(total_size)}") |
| |
| if len(files_to_upload) == 0: |
| print("⚠️ No files to upload!") |
| return |
| |
| |
| response = input(f"\n❓ Proceed with upload? (y/n): ").strip().lower() |
| if response != 'y': |
| print("❌ Upload cancelled") |
| return |
| |
| |
| large_files = [f for f, s in files_to_upload if s > 100 * 1024 * 1024] |
| small_files = [f for f, s in files_to_upload if s <= 100 * 1024 * 1024] |
| |
| print(f"\n📦 Upload strategy:") |
| print(f" Large files (>100MB): {len(large_files)}") |
| print(f" Small files: {len(small_files)}") |
| print(f" Concurrent workers: {MAX_WORKERS}") |
| |
| |
| print(f"\n⬆️ Uploading using optimized upload_folder...") |
| try: |
| start_time = time.time() |
| |
| |
| ignore_patterns = [ |
| "**/__pycache__/**", |
| "**/.git/**", |
| "**/*.pyc", |
| "**/.DS_Store", |
| "**/save_results/**", |
| "**/*.log", |
| "**/*.tmp", |
| "**/upload.py", |
| ] |
| |
| upload_folder( |
| folder_path=str(keyvid_dir), |
| repo_id=MODEL_ID, |
| repo_type="model", |
| ignore_patterns=ignore_patterns, |
| commit_message="Upload KeyVID model files", |
| multi_commits=True, |
| multi_commits_verbose=True, |
| ) |
| |
| elapsed_time = time.time() - start_time |
| print(f"\n✅ Upload complete!") |
| print(f"⏱️ Time taken: {elapsed_time/60:.2f} minutes") |
| print(f"🔗 View model at: https://huggingface.co/{MODEL_ID}") |
| |
| except Exception as e: |
| print(f"❌ Error with upload_folder: {e}") |
| print("\n📝 Falling back to concurrent file upload...") |
| |
| |
| start_time = time.time() |
| failed_files = [] |
| |
| with ThreadPoolExecutor(max_workers=MAX_WORKERS) as executor: |
| |
| tasks = [ |
| (api, keyvid_dir, file_path, MODEL_ID) |
| for file_path, _ in files_to_upload |
| ] |
| |
| |
| futures = {executor.submit(upload_file_wrapper, task): task[2] |
| for task in tasks} |
| |
| with tqdm(total=len(files_to_upload), desc="Uploading") as pbar: |
| for future in as_completed(futures): |
| file_path, success, error = future.result() |
| if success: |
| pbar.update(1) |
| else: |
| failed_files.append((file_path, error)) |
| pbar.update(1) |
| |
| elapsed_time = time.time() - start_time |
| |
| if failed_files: |
| print(f"\n⚠️ {len(failed_files)} files failed to upload:") |
| for file_path, error in failed_files[:10]: |
| print(f" - {file_path}: {error}") |
| else: |
| print(f"\n✅ All files uploaded successfully!") |
| print(f"⏱️ Time taken: {elapsed_time/60:.2f} minutes") |
| print(f"🔗 View model at: https://huggingface.co/{MODEL_ID}") |
|
|
| if __name__ == "__main__": |
| main() |
|
|
|
|