import fitz  # PyMuPDF

def split_pdf_fixed_pages(input_pdf, start_page=1, pages_per_chunk=25, output_prefix="output_part",endpage = None):
    """
    用 PyMuPDF 按每 pages_per_chunk 页分割 PDF 文件，从 start_page 开始。
    :param input_pdf: 输入 PDF 路径
    :param start_page: 从第几页开始分割（从1开始计数）
    :param pages_per_chunk: 每个小PDF包含多少页
    :param output_prefix: 输出文件前缀
    """
    doc = fitz.open(input_pdf)
    total_pages = len(doc) if not endpage else min(len(doc),endpage)

    if start_page < 1:
        start_page = 1
    if start_page > total_pages:
        raise ValueError("start_page 超过 PDF 总页数")

    # PyMuPDF页码从0开始
    start_index = start_page - 1

    part_num = 1
    for i in range(start_index, total_pages, pages_per_chunk):
        end_page = min(i + pages_per_chunk, total_pages)  # 不超出范围
        # 新建一个空 PDF
        new_doc = fitz.open()
        # 将指定页复制到新 PDF
        for page_num in range(i, end_page):
            new_doc.insert_pdf(doc, from_page=page_num, to_page=page_num)
        output_filename = f"{output_prefix}_{part_num}.pdf"
        new_doc.save(output_filename)
        new_doc.close()
        print(f"✅ 已保存 {output_filename}，包含第 {i+1}-{end_page} 页")
        part_num += 1

    doc.close()
def split_pdf_variable_pages(input_pdf, start_page=1, pages_per_chunk=[], output_prefix="output_part"):
    """
    用 PyMuPDF 按每 pages_per_chunk 页分割 PDF 文件，从 start_page 开始。
    :param input_pdf: 输入 PDF 路径
    :param start_page: 从第几页开始分割（从1开始计数）
    :param pages_per_chunk: 每个小PDF包含多少页
    :param output_prefix: 输出文件前缀
    """
    doc = fitz.open(input_pdf)
    total_pages = len(doc)

    if start_page < 1:
        start_page = 1
    if start_page > total_pages:
        raise ValueError("start_page 超过 PDF 总页数")

    # PyMuPDF页码从0开始
    start_index = start_page - 1

    part_num = 1
    for count in pages_per_chunk:
        end_page = min(count + start_index, total_pages)  # 不超出范围
        # 新建一个空 PDF
        new_doc = fitz.open()
        # 将指定页复制到新 PDF
        for page_num in range(start_index, end_page):
            new_doc.insert_pdf(doc, from_page=page_num, to_page=page_num)
        output_filename = f"{output_prefix}_{part_num}.pdf"
        new_doc.save(output_filename)
        new_doc.close()
        print(f"✅ 已保存 {output_filename}，包含第 {start_index}-{end_page} 页")
        part_num += 1
        start_index = end_page

    doc.close()

def rearrange_pdf(input_pdf, start_page, pagelist=[], output_filename="output.pdf"):
    """
    用 PyMuPDF 按每 pages_per_chunk 页分割 PDF 文件，从 start_page 开始。
    :param input_pdf: 输入 PDF 路径
    :param start_page: 从第几页开始分割（从1开始计数）
    :param pages_per_chunk: 每个小PDF包含多少页
    :param output_prefix: 输出文件前缀
    """
    doc = fitz.open(input_pdf)
    total_pages = len(doc)

    #if start_page < 1:
    #    start_page = 1
    if start_page > total_pages:
        raise ValueError("start_page 超过 PDF 总页数")

    # PyMuPDF页码从0开始
    start_index = start_page - 1
    new_doc = fitz.open()
    part_num = 1
    for i in pagelist:
        page_num = start_page + i - 2
        # 新建一个空 PDF
        
        # 将指定页复制到新 PDF
        new_doc.insert_pdf(doc, from_page=page_num, to_page=page_num)
        
    new_doc.save(output_filename)
    new_doc.close()
    doc.close()
def split_list(input_list, chunk_size):
    """将列表分割为指定大小的子列表"""
    for i in range(0, len(input_list), chunk_size):
        yield input_list[i:min(len(input_list),i + chunk_size)]

def gen_pdfs_from_pagelist(input_file, start_page, page_list,pages_per_part, output_prefix, start_part=1):
    for i,sublist in enumerate(split_list(page_list,pages_per_part),start_part):
        rearrange_pdf(input_file, start_page, sublist, f"{output_prefix}{i}.pdf")
        print(f"✅ 已保存 {output_prefix}{i}.pdf，包含第 {sublist} 页")
# ===== 用法示例 =====
if __name__ == "__main__":
    input_file = "俄汉双解词典.pdf"
    split_pdf_fixed_pages(input_file, start_page=12, pages_per_chunk=30, output_prefix="pdf/俄汉双解词典",endpage=2149)
