chore: 清理构建产物并更新.gitignore

删除旧的Nuitka构建目录和生成的二进制文件
将build_nuitka/添加到.gitignore中避免误提交
This commit is contained in:
2026-02-10 16:42:29 +08:00
parent 6630d94333
commit 92619edcdb
988 changed files with 824 additions and 120062 deletions
Binary file not shown.
Binary file not shown.
+56
View File
@@ -0,0 +1,56 @@
import requests
import time
def download_file(url, filepath, referer=None, log_callback=None):
"""
[Data Layer] 文件下载执行器
使用 requests 流式下载,包含重试机制
"""
try:
# 根据不同平台可能需要调整 Headers
headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
}
# 优先使用传入的 referer
if referer:
headers["Referer"] = referer
else:
# 简单的 Referer 区分 (保留旧逻辑作为后备)
if (
"bilibili.com" in url
or "hdslb.com" in url
or "bilivideo.com" in url
):
headers["Referer"] = "https://www.bilibili.com/"
else:
headers["Referer"] = "https://www.douyin.com/"
for i in range(3):
try:
response = requests.get(
url, headers=headers, stream=True, timeout=20
)
if response.status_code == 200:
with open(filepath, "wb") as f:
for chunk in response.iter_content(chunk_size=1024 * 1024):
f.write(chunk)
return True
else:
# 只有在最后一次尝试失败时记录状态码,或者记录每次警告
if i == 2:
if log_callback:
log_callback(
f"下载请求失败: Status {response.status_code} | URL: {url[:30]}..."
)
except requests.exceptions.RequestException as e:
if i == 2:
if log_callback:
log_callback(f"网络请求异常: {e}")
time.sleep(1)
continue
return False
except Exception as e:
if log_callback:
log_callback(f"下载出错: {e}")
return False
Binary file not shown.
Binary file not shown.
+245
View File
@@ -0,0 +1,245 @@
import os
import time
import re
import json
import requests
import concurrent.futures
from datetime import datetime
from tkinter import messagebox
from DrissionPage import ChromiumPage, ChromiumOptions
from src.downloader import download_file
def run_bilibili_task(
target_url, target_count, save_root, browser_path, log_callback, finish_callback
):
"""
[Control Layer] B站核心业务流程
1. 启动浏览器
2. 监听数据包获取作品列表
3. 调度线程池并行下载
"""
dp = None
try:
log_callback(f"正在启动 Edge 浏览器 ({browser_path})...")
co = ChromiumOptions()
co.set_paths(browser_path=browser_path)
dp = ChromiumPage(addr_or_opts=co)
# 监听 B站 用户视频列表接口
dp.listen.start("space/wbi/arc/search")
log_callback(f"正在访问: {target_url}")
dp.get(target_url)
# 尝试自动跳转到 /video 页面
if "space.bilibili.com" in target_url and "/video" not in dp.url:
video_url = target_url.rstrip("/") + "/video"
log_callback(f"尝试跳转到视频页: {video_url}")
dp.get(video_url)
collected_works = []
log_callback("正在扫描作品列表 (请不要关闭弹出的浏览器)...")
no_new_data_count = 0
while len(collected_works) < target_count:
dp.scroll.to_bottom()
res = dp.listen.wait(timeout=2)
found_new = False
if res:
try:
data = res.response.body
# 解析 B站 响应: data['data']['list']['vlist']
if (
data
and isinstance(data, dict)
and "data" in data
and "list" in data["data"]
):
vlist = data["data"]["list"]["vlist"]
if vlist:
for video in vlist:
if not any(
w["bvid"] == video["bvid"] for w in collected_works
):
collected_works.append(video)
found_new = True
except Exception:
pass
log_callback(f"已获取作品信息: {len(collected_works)}/{target_count}")
if len(collected_works) >= target_count:
break
if not found_new:
no_new_data_count += 1
time.sleep(1)
# 尝试点击下一页
try:
next_btn = dp.ele("text:下一页", timeout=1)
if next_btn:
next_btn.click()
no_new_data_count = 0
time.sleep(2)
except:
pass
else:
no_new_data_count = 0
if no_new_data_count > 10:
log_callback("未检测到新数据,可能已到底部。")
break
log_callback(f"扫描完成,共获取 {len(collected_works)} 个作品。")
dp.close()
dp = None
# 处理数据
works_to_process = collected_works[:target_count]
log_callback("开始下载 (多线程并行)...")
download_tasks = []
for index, work in enumerate(works_to_process):
ts = work.get("created", time.time())
date_str = datetime.fromtimestamp(ts).strftime("%Y_%m_%d")
title = work.get("title", "无标题")
title = re.sub(r'[\\/:*?"<>|]', "_", title)
file_name_base = f"{date_str}_{title}"
download_tasks.append(
{
"work": work,
"index": index,
"file_name_base": file_name_base,
}
)
with concurrent.futures.ThreadPoolExecutor(max_workers=3) as executor:
futures = []
for task in download_tasks:
futures.append(
executor.submit(
process_bilibili_work,
task["work"],
task["index"],
len(works_to_process),
save_root,
task["file_name_base"],
log_callback,
)
)
concurrent.futures.wait(futures)
log_callback("=" * 30)
log_callback("全部任务结束!")
if finish_callback:
finish_callback("完成", "全部下载任务已结束!")
except Exception as e:
log_callback(f"发生严重错误: {e}")
if dp:
try:
dp.close()
except:
pass
finally:
# 这里需要一种机制通知UI线程结束,或者由UI层处理
# 简化处理:finish_callback可以包含清理UI状态的逻辑,但这里主要是弹窗
# 实际UI状态恢复最好由调用方通过回调处理
pass
def process_bilibili_work(
work, index, total_count, save_root, file_name_base, log_callback
):
"""
[Data Layer] B站单个视频处理
"""
try:
bvid = work["bvid"]
log_callback(f"[{index + 1}/{total_count}] {file_name_base} | 分析中...")
video_url, audio_url = get_bilibili_play_url(bvid)
if not video_url:
log_callback(
f"[{index + 1}/{total_count}] {file_name_base} -> 无法获取下载地址"
)
return
video_path = os.path.join(save_root, f"{file_name_base}.mp4")
# 下载视频 (带 Referer)
if download_file(
video_url,
video_path,
referer="https://www.bilibili.com/",
log_callback=log_callback,
):
log_callback(
f"[{index + 1}/{total_count}] {file_name_base} -> 视频下载完成"
)
else:
log_callback(
f"[{index + 1}/{total_count}] {file_name_base} -> 视频下载失败"
)
# 尝试下载音频 (如果有)
if audio_url:
audio_path = os.path.join(save_root, f"{file_name_base}_audio.m4a")
if not os.path.exists(audio_path):
if download_file(
audio_url,
audio_path,
referer="https://www.bilibili.com/",
log_callback=log_callback,
):
log_callback(
f"[{index + 1}/{total_count}] {file_name_base} -> 音频下载完成"
)
else:
log_callback(
f"[{index + 1}/{total_count}] {file_name_base} -> 音频已存在"
)
except Exception as e:
log_callback(f"[{index + 1}/{total_count}] {file_name_base} -> 处理出错: {e}")
def get_bilibili_play_url(bvid):
"""
获取 B站 视频播放地址
"""
url = f"https://www.bilibili.com/video/{bvid}"
headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
"Referer": "https://www.bilibili.com/",
}
try:
resp = requests.get(url, headers=headers, timeout=10)
if resp.status_code == 200:
match = re.search(r"window\.__playinfo__=(.*?)</script>", resp.text)
if match:
info = json.loads(match.group(1))
data = info.get("data", {})
if "durl" in data and data["durl"]:
return data["durl"][0]["url"], None
if "dash" in data:
video_url = None
audio_url = None
if "video" in data["dash"] and data["dash"]["video"]:
video_url = data["dash"]["video"][0]["baseUrl"]
if "audio" in data["dash"] and data["dash"]["audio"]:
audio_url = data["dash"]["audio"][0]["baseUrl"]
return video_url, audio_url
except Exception:
pass
return None, None
+188
View File
@@ -0,0 +1,188 @@
import os
import time
import concurrent.futures
from datetime import datetime
from DrissionPage import ChromiumPage, ChromiumOptions
from src.downloader import download_file
def run_douyin_task(target_url, target_count, save_root, browser_path, log_callback, finish_callback):
"""
[Control Layer] 抖音核心业务流程
1. 启动浏览器
2. 监听数据包获取作品列表
3. 调度线程池并行下载
"""
dp = None
try:
log_callback(f"正在启动 Edge 浏览器 ({browser_path})...")
co = ChromiumOptions()
co.set_paths(browser_path=browser_path)
# 尝试启动浏览器
dp = ChromiumPage(addr_or_opts=co)
# 开始监听
dp.listen.start("aweme/v1/web/aweme/post")
log_callback(f"正在访问: {target_url}")
dp.get(target_url)
collected_works = []
log_callback("正在扫描作品列表 (请不要关闭弹出的浏览器)...")
no_new_data_count = 0
while len(collected_works) < target_count:
dp.scroll.to_bottom()
# 等待数据包
res = dp.listen.wait(timeout=2)
found_new = False
if res:
try:
data = res.response.body
if data and "aweme_list" in data:
aweme_list = data["aweme_list"]
if aweme_list:
for aweme in aweme_list:
if not any(
w["aweme_id"] == aweme["aweme_id"]
for w in collected_works
):
collected_works.append(aweme)
found_new = True
except:
pass
log_callback(f"已获取作品信息: {len(collected_works)}/{target_count}")
if len(collected_works) >= target_count:
break
if not found_new:
no_new_data_count += 1
time.sleep(1)
else:
no_new_data_count = 0
if no_new_data_count > 8:
log_callback("未检测到新数据,可能已到底部。")
break
log_callback(f"扫描完成,共获取 {len(collected_works)} 个作品。")
dp.close() # 关闭浏览器
dp = None # 置空,避免 finally 重复关闭
# 处理数据
works_to_process = collected_works[:target_count]
# 按时间正序
works_to_process.sort(key=lambda x: x["create_time"])
log_callback("开始下载 (多线程并行)...")
date_counter = {}
# 准备下载任务列表
download_tasks = []
for index, work in enumerate(works_to_process):
ts = work["create_time"]
date_str = datetime.fromtimestamp(ts).strftime("%Y_%m_%d")
if date_str not in date_counter:
date_counter[date_str] = 1
file_name_base = date_str
else:
date_counter[date_str] += 1
count_idx = date_counter[date_str]
file_name_base = f"{date_str}({count_idx})"
download_tasks.append(
{"work": work, "index": index, "file_name_base": file_name_base}
)
# 使用线程池执行下载
# max_workers=5 表示同时下载5个
with concurrent.futures.ThreadPoolExecutor(max_workers=5) as executor:
futures = []
for task in download_tasks:
futures.append(
executor.submit(
process_douyin_work,
task["work"],
task["index"],
len(works_to_process),
save_root,
task["file_name_base"],
log_callback
)
)
# 等待所有任务完成
concurrent.futures.wait(futures)
log_callback("=" * 30)
log_callback("全部任务结束!")
if finish_callback:
finish_callback("完成", "全部下载任务已结束!")
except Exception as e:
log_callback(f"发生严重错误: {e}")
if dp:
try:
dp.close()
except:
pass
finally:
pass
def process_douyin_work(work, index, total_count, save_root, file_name_base, log_callback):
"""
[Data Layer] 单个任务处理逻辑 (Worker)
判断作品类型(视频/图文),生成路径并调用下载器
"""
try:
is_video = True
if "images" in work and work["images"]:
is_video = False
log_callback(
f"[{index + 1}/{total_count}] {file_name_base} | {'视频' if is_video else '图文'} | 下载中..."
)
if is_video:
video_url = work["video"]["play_addr"]["url_list"][0]
file_path = os.path.join(save_root, f"{file_name_base}.mp4")
if not os.path.exists(file_path):
if download_file(video_url, file_path, log_callback=log_callback):
log_callback(
f"[{index + 1}/{total_count}] {file_name_base} -> 下载完成"
)
else:
log_callback(
f"[{index + 1}/{total_count}] {file_name_base} -> 下载失败"
)
else:
log_callback(
f"[{index + 1}/{total_count}] {file_name_base} -> 文件已存在,跳过"
)
else:
img_folder = os.path.join(save_root, file_name_base)
if not os.path.exists(img_folder):
os.makedirs(img_folder)
images = work["images"]
for idx, img_obj in enumerate(images):
img_url = img_obj["url_list"][0]
img_name = f"{idx + 1}.png"
img_path = os.path.join(img_folder, img_name)
if not os.path.exists(img_path):
download_file(img_url, img_path, log_callback=log_callback)
log_callback(
f"[{index + 1}/{total_count}] {file_name_base} -> 图文下载完成"
)
except Exception as e:
log_callback(f"[{index + 1}/{total_count}] {file_name_base} -> 处理出错: {e}")
Binary file not shown.
+260
View File
@@ -0,0 +1,260 @@
import tkinter as tk
from tkinter import filedialog, messagebox, scrolledtext
import threading
import os
from src.utils import find_edge_path
from src.tasks.bilibili import run_bilibili_task
from src.tasks.douyin import run_douyin_task
class DouyinDownloaderApp:
def __init__(self, root):
"""
[UI Layer] 初始化界面
负责创建主窗口、设置图标、初始化变量和加载布局
"""
self.root = root
self.root.title("抖音/B站批量下载工具 (GUI版)")
self.root.geometry("600x650")
# 设置窗口图标
try:
# 尝试多种路径查找图标,兼容源码运行和打包后的情况
# 注意:这里的路径逻辑可能需要根据新的目录结构微调,
# 但如果从根目录运行,os.path.dirname(__file__) 将是 src/ui
# 所以我们需要往上找两层,或者依赖入口文件传进来的路径
# 为了兼容性,我们假设入口文件在根目录,ico文件夹也在根目录
base_dir = os.getcwd()
# 或者使用相对路径尝试
icon_candidates = [
os.path.join(base_dir, "ico", "video_downloader.ico"),
os.path.join(base_dir, "video_downloader.ico"),
# 如果是从 src/ui/app.py 视角
os.path.abspath(
os.path.join(
os.path.dirname(__file__),
"..",
"..",
"ico",
"video_downloader.ico",
)
),
]
for icon_path in icon_candidates:
if os.path.exists(icon_path):
self.root.iconbitmap(icon_path)
break
except Exception:
pass
# 界面布局变量
self.url_var = tk.StringVar()
self.count_var = tk.StringVar(value="10")
self.save_path_var = tk.StringVar()
# 浏览器路径初始化
default_browser = find_edge_path()
if not default_browser:
default_browser = (
r"C:\Program Files (x86)\Microsoft\Edge\Application\msedge.exe"
)
self.browser_path_var = tk.StringVar(value=default_browser)
# 平台选择变量
self.platform_var = tk.StringVar(value="douyin")
self.is_running = False
self.create_widgets()
def create_widgets(self):
"""
[UI Layer] 构建界面组件
使用 Pack 布局管理器按顺序排列各个输入框和按钮
"""
# 1. 主页链接
tk.Label(self.root, text="1. 作者主页链接:").pack(
anchor="w", padx=10, pady=(10, 0)
)
entry_url = tk.Entry(self.root, textvariable=self.url_var, width=60)
entry_url.pack(padx=10, pady=5, fill="x")
# 2. 爬取数量
tk.Label(self.root, text="2. 爬取视频个数:").pack(
anchor="w", padx=10, pady=(10, 0)
)
entry_count = tk.Entry(self.root, textvariable=self.count_var, width=60)
entry_count.pack(padx=10, pady=5, fill="x")
# 3. 保存路径
tk.Label(self.root, text="3. 保存路径:").pack(anchor="w", padx=10, pady=(10, 0))
frame_path = tk.Frame(self.root)
frame_path.pack(padx=10, pady=5, fill="x")
# === 修正点:readOnly=True 改为 state='readonly' ===
entry_path = tk.Entry(
frame_path, textvariable=self.save_path_var, state="readonly"
)
entry_path.pack(side="left", fill="x", expand=True)
btn_browse = tk.Button(
frame_path, text="选择文件夹", command=self.select_folder
)
btn_browse.pack(side="right", padx=(5, 0))
# 4. 浏览器路径
tk.Label(self.root, text="4. 浏览器路径 (Edge):").pack(
anchor="w", padx=10, pady=(10, 0)
)
frame_browser = tk.Frame(self.root)
frame_browser.pack(padx=10, pady=5, fill="x")
entry_browser = tk.Entry(frame_browser, textvariable=self.browser_path_var)
entry_browser.pack(side="left", fill="x", expand=True)
btn_browse_browser = tk.Button(
frame_browser, text="选择文件", command=self.select_browser
)
btn_browse_browser.pack(side="right", padx=(5, 0))
# 5. 平台选择
tk.Label(self.root, text="5. 下载平台:").pack(anchor="w", padx=10, pady=(10, 0))
frame_platform = tk.Frame(self.root)
frame_platform.pack(padx=10, pady=5, fill="x")
rb_douyin = tk.Radiobutton(
frame_platform, text="抖音", variable=self.platform_var, value="douyin"
)
rb_douyin.pack(side="left", padx=10)
rb_bilibili = tk.Radiobutton(
frame_platform, text="B站", variable=self.platform_var, value="bilibili"
)
rb_bilibili.pack(side="left", padx=10)
# 6. 开始按钮
self.btn_start = tk.Button(
self.root,
text="开始下载",
command=self.start_thread,
bg="#4CAF50",
fg="white",
font=("Arial", 12, "bold"),
)
self.btn_start.pack(pady=15, fill="x", padx=50)
# 6. 日志输出窗口
tk.Label(self.root, text="运行日志:").pack(anchor="w", padx=10)
self.log_text = scrolledtext.ScrolledText(
self.root, height=15, state="disabled"
)
self.log_text.pack(padx=10, pady=5, fill="both", expand=True)
def log(self, message):
"""
[UI Layer] 线程安全的日志输出
子线程不能直接更新UI,必须通过 root.after 调度到主线程执行
"""
self.root.after(0, self._log_impl, message)
def _log_impl(self, message):
"""实际执行日志写入的方法"""
self.log_text.config(state="normal")
self.log_text.insert(tk.END, message + "\n")
self.log_text.see(tk.END) # 滚动到底部
self.log_text.config(state="disabled")
def select_folder(self):
"""选择文件夹对话框"""
folder_selected = filedialog.askdirectory()
if folder_selected:
self.save_path_var.set(folder_selected)
def select_browser(self):
"""选择浏览器文件对话框"""
file_selected = filedialog.askopenfilename(
title="选择 Edge 浏览器可执行文件",
filetypes=[("Executable Files", "*.exe"), ("All Files", "*.*")],
)
if file_selected:
self.browser_path_var.set(file_selected)
def start_thread(self):
"""
[Control Layer] 线程调度
校验参数并开启独立线程运行核心任务,防止界面卡死
"""
if self.is_running:
messagebox.showwarning("提示", "任务正在进行中,请稍候...")
return
# 验证输入
url = self.url_var.get().strip()
count_str = self.count_var.get().strip()
save_path = self.save_path_var.get().strip()
browser_path = self.browser_path_var.get().strip()
platform = self.platform_var.get()
if not url:
messagebox.showerror("错误", "请输入主页链接")
return
if not count_str.isdigit() or int(count_str) <= 0:
messagebox.showerror("错误", "请输入正确的数量")
return
if not save_path:
messagebox.showerror("错误", "请选择保存路径")
return
# 检查浏览器路径是否存在
if not browser_path or not os.path.exists(browser_path):
messagebox.showerror(
"错误",
f"指定的浏览器路径不存在:\n{browser_path}\n请手动选择正确的 msedge.exe 路径。",
)
return
self.is_running = True
self.btn_start.config(state="disabled", text="正在运行...")
self.log_text.config(state="normal")
self.log_text.delete(1.0, tk.END) # 清空日志
self.log_text.config(state="disabled")
# 开启线程
thread = threading.Thread(
target=self.run_task,
args=(url, int(count_str), save_path, browser_path, platform),
)
thread.daemon = True
thread.start()
def run_task(self, target_url, target_count, save_root, browser_path, platform):
"""
[Control Layer] 任务分发
"""
try:
if platform == "bilibili":
run_bilibili_task(
target_url,
target_count,
save_root,
browser_path,
log_callback=self.log,
finish_callback=self.finish_task,
)
else:
run_douyin_task(
target_url,
target_count,
save_root,
browser_path,
log_callback=self.log,
finish_callback=self.finish_task,
)
finally:
self.is_running = False
self.root.after(
0, lambda: self.btn_start.config(state="normal", text="开始下载")
)
def finish_task(self, title, message):
self.root.after(0, lambda: messagebox.showinfo(title, message))
+16
View File
@@ -0,0 +1,16 @@
import os
def find_edge_path():
"""
[Utils] 自动查找 Edge 浏览器路径,提升用户体验
"""
possible_paths = [
r"C:\Program Files (x86)\Microsoft\Edge\Application\msedge.exe",
r"C:\Program Files\Microsoft\Edge\Application\msedge.exe",
os.path.expanduser(r"~\AppData\Local\Microsoft\Edge\Application\msedge.exe"),
]
for path in possible_paths:
if os.path.exists(path):
return path
return None