# # 程序信息
# # MyNote修改版
# # 描述：此为个人修改版，原作者为格蕾特（グレーテ） 参见 https://forum.freemdict.com/t/topic/34974
# # 目前版本：RX1.0
# # 历史版本：
# # 改版的更新日志：
# # RX1.0  2026年 4月 5日    修复bug：paste的safe模式下可能出现的格式问题
# # R2.4   2026年 4月 5日    增加功能：为粘贴文本增加了纯文本模式（plain）。
# # R2.3   2026年 4月 4日    修改功能：优化了UI界面。
# # R2.2   2026年 4月 4日    修改功能：优化了代码结构和错误处理机制；优化了大文本编辑的性能。
# # R2.1   2026年 4月 3日    修改功能：给编辑界面增加了滚动块。
# #                         修改功能：当笔记内容超过一定长度时，自动启用分片块编辑模式，避免一次性加载过大文本导致界面卡顿；在分片块编辑模式下，提供块导航和保存功能，提升大笔记的编辑体验。
# # R2.0   2026年 4月 3日    修改功能：完善了复制和粘贴的支持，修复了之前版本中粘贴 HTML 时可能出现的格式问题，增加了粘贴模式选择（safe/raw），并保存用户的选择以便下次使用；修复了之前版本中在某些情况下可能出现的笔记内容丢失问题；优化了代码结构和错误处理机制。
# #                          修改功能：可调整编辑界面的窗口大小和位置，并保存用户的调整以便下次使用。
# #                          增加功能：将导入的图片文件保存在与分组同名的文件夹下，避免图片文件过多导致管理混乱。   
# # R1.0   2024年12月18日    增加功能：添加记忆上一次分组选项的功能
# # 原版的更新日志：
# # 3.0    2025年 1月 9日    修改功能：允许同一条目在多个分组下创建笔记
# #                          增加功能：允许用户往笔记内添加图片
# #                                    允许用户通过选择文本进行拖动；绑定 Ctrl+B/I/U 快捷键
# # 2.0    2024年12月31日    增加功能：笔记本分组
# # 1.0    2024年12月18日    增加功能：添加笔记、记录查询信息

import sys
import os
import re
import tkinter as tk
from tkinter import messagebox, filedialog
import win32clipboard
import win32con
from bs4 import BeautifulSoup, Comment
import logging
import json

# 设置输出编码为 UTF-8
sys.stdout.reconfigure(encoding='utf-8')

# ===== 集中配置管理 =====
CONFIG = {
    'ENABLE_NOTE': True,
    'ENABLE_QUERY_COUNT': False,
    'NOTE_FOLDER': r'# 填写笔记文件夹路径 #',
    'LOG_FOLDER': r'# 填写日志文件夹路径 #',
    'DEFAULT_NOTE_FILE': 'GoldenDict-MyNote.txt',
    'TEXT_SIZE_THRESHOLD': 30000,
    'CHUNK_LOAD_STEP': 16000,
}

# 兼容性别名
ENABLE_NOTE = CONFIG['ENABLE_NOTE']
ENABLE_QUERY_COUNT = CONFIG['ENABLE_QUERY_COUNT']
NOTE_FOLDER = CONFIG['NOTE_FOLDER']
LOG_FOLDER = CONFIG['LOG_FOLDER']
DEFAULT_NOTE_FILE = CONFIG['DEFAULT_NOTE_FILE']
TEXT_SIZE_THRESHOLD = CONFIG['TEXT_SIZE_THRESHOLD']
MAX_TEXT_UNDO_SIZE = TEXT_SIZE_THRESHOLD
TEXT_LAZY_LOAD_SIZE = TEXT_SIZE_THRESHOLD
CHUNK_MODE_THRESHOLD = TEXT_SIZE_THRESHOLD
CHUNK_LOAD_STEP = CONFIG['CHUNK_LOAD_STEP']

CONFIG_FOLDER = os.path.join(NOTE_FOLDER, 'config')
os.makedirs(CONFIG_FOLDER, exist_ok=True)
QUERY_COUNT_FILE = os.path.join(CONFIG_FOLDER, 'GoldenDict-Query.txt')
FALLBACK_FOLDER = os.path.join(os.getenv('APPDATA', os.path.expanduser('~')), 'GoldenDict-MyNote')
os.makedirs(FALLBACK_FOLDER, exist_ok=True)

# 安全写入通用方法
def safe_write_file(path, data, mode='w', encoding='utf-8'):
    try:
        with open(path, mode, encoding=encoding) as f:
            f.write(data)
        return path
    except (PermissionError, OSError) as e:
        logging.error(f"Permission denied or I/O error writing {path}: {e}")
        fallback_path = os.path.join(FALLBACK_FOLDER, os.path.basename(path))
        try:
            with open(fallback_path, mode, encoding=encoding) as f:
                f.write(data)
            logging.info(f"Saved to fallback path: {fallback_path}")
            return fallback_path
        except Exception as e2:
            logging.error(f"Fallback save failed for {fallback_path}: {e2}")
            return None

# 安全读取通用方法
def safe_read_file(path, encoding='utf-8'):
    try:
        with open(path, 'r', encoding=encoding) as f:
            return f.read()
    except (OSError, IOError) as e:
        logging.error(f"Error reading {path}: {e}")
        return None

# 确保日志文件夹存在
if not os.path.exists(LOG_FOLDER):
    try:
        os.makedirs(LOG_FOLDER)
    except OSError:
        LOG_FOLDER = None  # 如果无法创建，则不使用日志文件

# 配置日志记录到文件
log_file = None
if LOG_FOLDER:
    log_file = os.path.join(LOG_FOLDER, 'GoldenDict-MyNote.log')
    
logging.basicConfig(
    level=logging.ERROR,
    format='%(asctime)s - %(levelname)s - %(message)s',
    handlers=[
        logging.FileHandler(log_file, encoding='utf-8') if log_file else logging.NullHandler()
    ]
)

# === 新增代码开始 ===
LAST_GROUP_CONFIG = os.path.join(CONFIG_FOLDER, 'last_group_config.txt')

def get_last_group():
    """读取上次记录的分组名"""
    content = safe_read_file(LAST_GROUP_CONFIG)
    if content:
        return content.strip()
    return '[Default]'

def save_last_group(group_name):
    """保存当前使用的分组名"""
    safe_write_file(LAST_GROUP_CONFIG, group_name)

LAST_PASTE_MODE_CONFIG = os.path.join(CONFIG_FOLDER, 'last_paste_mode_config.txt')

def get_last_paste_mode():
    """读取上次记录的粘贴模式"""
    content = safe_read_file(LAST_PASTE_MODE_CONFIG)
    if content:
        mode = content.strip()
        return mode if mode in ['safe', 'raw', 'plain'] else 'safe'
    return 'safe'

def save_last_paste_mode(mode):
    """保存当前使用的粘贴模式"""
    safe_write_file(LAST_PASTE_MODE_CONFIG, mode)

WINDOW_CONFIG = os.path.join(CONFIG_FOLDER, 'window_config.txt')

def get_window_geometry():
    """读取上次记录的窗口几何信息"""
    content = safe_read_file(WINDOW_CONFIG)
    if content:
        return content.strip()
    return "800x600+100+100"  # 默认大小和位置

def save_window_geometry(geometry):
    """保存当前窗口几何信息"""
    safe_write_file(WINDOW_CONFIG, geometry)
# === 新增代码结束 ===

# 获取查询词
if len(sys.argv) < 2:
    print("<html><body>Error: No word provided.</body></html>")
    sys.exit(1)

word = sys.argv[1].strip()
if not word:
    print("<html><body>Error: Empty word provided.</body></html>")
    sys.exit(1)

# 判断查询词是否包含 edit=true
is_edit_mode = '?edit=true' in word

# 去除查询词中的 ?edit=true 部分，只在末尾出现时去除
if word.endswith("?edit=true"):
    word = word[:-10]  # 去掉末尾的 ?edit=true

# 根据查询词中的 = 拆分，判断是否指定编辑笔记分组
is_multiple_group = word.split('=')
word = is_multiple_group[0].strip()
selected_group = is_multiple_group[1].strip() if len(is_multiple_group) == 2 else ''

# 检查文件夹是否存在
if not os.path.exists(NOTE_FOLDER):
    print(f"<html><body>Error: Note folder not found: {NOTE_FOLDER}</body></html>")
    sys.exit(1)


if ENABLE_QUERY_COUNT:
    from datetime import datetime, timedelta
    # 获取用户本地时区（使用Python原生时区处理）
    local_tz = datetime.now().astimezone().tzinfo


# 记录查询次数
if ENABLE_QUERY_COUNT:
    query_data = {}

    if os.path.exists(QUERY_COUNT_FILE):
        with open(QUERY_COUNT_FILE, 'r', encoding='utf-8') as file:
            for line in file:
                parts = line.strip().split('\t')
                if len(parts) == 4:
                    q_word, count, first_time, last_time = parts
                    query_data[q_word] = {
                        'count': int(count),
                        'first_time': first_time,
                        'last_time': last_time
                    }

    now = datetime.now(local_tz).replace(microsecond=0)  # now 转换为 datetime 对象
    
    if word in query_data:
        prev_last_time = query_data[word]['last_time']
        # 计算上次查询时间与当前时间的差值
        last_time_obj = datetime.fromisoformat(prev_last_time).astimezone(local_tz)
        time_diff = now - last_time_obj

        # 如果上次查询时间与当前时间的差值小于10分钟，不增加查询次数
        if time_diff < timedelta(minutes=10):
            query_output = ""  # 不计入查询
        else:
            query_data[word]['count'] += 1
            query_data[word]['last_time'] = now.isoformat()  # 保存为 ISO 格式字符串
    else:
        prev_last_time = "N/A"
        query_data[word] = {
            'count': 1,
            'first_time': now.isoformat(),  # 保存为 ISO 格式字符串
            'last_time': now.isoformat()  # 保存为 ISO 格式字符串
        }

    # 更新文件
    with open(QUERY_COUNT_FILE, 'w', encoding='utf-8') as file:
        for q_word, data in query_data.items():
            file.write(f"{q_word}\t{data['count']}\t{data['first_time']}\t{data['last_time']}\n")

# 获取查询次数输出
if ENABLE_QUERY_COUNT and word in query_data:
    data = query_data[word]
    first_time = datetime.fromisoformat(data['first_time']).astimezone(local_tz)
    last_time = datetime.fromisoformat(data['last_time']).astimezone(local_tz)
    now = datetime.now(local_tz)

    def format_time_difference(time):
        diff = now - time
        seconds = diff.total_seconds()
        if seconds < 86400:
            if seconds < 60:
                return f"{int(seconds)} seconds ago"
            elif seconds < 3600:
                return f"{seconds / 60:.1f} minutes ago"
            else:
                return f"{seconds / 3600:.1f} hours ago"
        else:
            return time.strftime('%Y-%m-%dT%H:%M:%S%z')

    first_time_display = format_time_difference(first_time)

    if prev_last_time != "N/A":
        second_to_last_time = datetime.fromisoformat(prev_last_time).astimezone(local_tz)
        last_time_display = format_time_difference(second_to_last_time)
    else:
        last_time_display = "N/A"

    query_output = (
        f"<b>{word}</b> queried <b>{data['count']}</b> times, "
        f"first query time is <b>{first_time_display}</b>, "
        f"last query time is: <b>{last_time_display}</b>."
    )
else:
    query_output = ""
    

# ===== 笔记内容管理（优化版）=====
note_contents_cache = {}

def get_note_content(file_name):
    if file_name not in note_contents_cache:
        path = os.path.join(NOTE_FOLDER, file_name)
        note_contents_cache[file_name] = safe_read_file(path) or ''
    return note_contents_cache[file_name]

def set_note_content(file_name, content):
    note_contents_cache[file_name] = content

def build_note_pattern(word):
    return rf"{re.escape(word)}\n*<font size=5>{re.escape(word)}</font>\n*<p class=\"senselevel-1\">(.*?)</p>\n*</>"

def find_note_in_content(content, word):
    pattern = build_note_pattern(word)
    match = re.search(pattern, content, re.DOTALL)
    return match.group(1).strip() if match else None

# 遍历文件夹中的笔记文件
if ENABLE_NOTE:
    try:
        note_files = [f for f in os.listdir(NOTE_FOLDER) if f.startswith('GoldenDict-MyNote') and f.endswith('.txt')]
    except OSError as e:
        logging.error(f"Error listing note folder: {e}")
        note_files = []

    
    if selected_group:
        # 根据笔记分组还原笔记位置
        selected_note_file = f"GoldenDict-MyNote-{selected_group}.txt" if selected_group != '[Default]' else DEFAULT_NOTE_FILE
        current_note_content = get_note_content(selected_note_file)
        note_text = find_note_in_content(current_note_content, word)
        result = note_text if note_text else f"No note found for the word: {word}"
        match = bool(note_text)
    else:
        # 默认模式下寻找所有分组中该词条（按文件扫描，不缓存全部）
        selected_note_file = DEFAULT_NOTE_FILE
        result_flag = False
        len_results = 0
        all_group_names = []
        results = []

        for nf in note_files:
            content = get_note_content(nf)
            note_text = find_note_in_content(content, word)
            if note_text:
                results.append((note_text, nf))

        len_results = len(results)
        if len_results == 0:
            result_flag = False
            result = f"No note found for the word: {word}"
        elif len_results == 1:
            result, selected_note_file = results[0]
            group_name = selected_note_file.replace('GoldenDict-MyNote-', '').replace('.txt', '') if selected_note_file != DEFAULT_NOTE_FILE else "[Default]"
            all_group_names.append(group_name)
            result_flag = True
        else:
            formatted_results = ""
            for content, file in results:
                group_name = file.replace('GoldenDict-MyNote-', '').replace('.txt', '') if file != DEFAULT_NOTE_FILE else "[Default]"
                formatted_results += f"<b>Group: {group_name}</b><br/>{content}<br/><hr/>"
                all_group_names.append(group_name)
            result = formatted_results
            result_flag = True
        match = result_flag




# ===== HTML处理公共函数 =====
def strip_html_newlines(text):
    """去除文本末尾的HTML换行标签和空白"""
    # 先去除末尾所有的 HTML 换行标签 <br> 或 <br/>
    text = re.sub(r'(?:\s*<br\s*/?\>\s*)+$', '', text, flags=re.IGNORECASE)
    # 再去除末尾所有的空白字符
    text = re.sub(r'\n\s*\n+', '\n\n', text)
    return text.rstrip()

# ===== 预编译正则表达式（性能优化）=====
RE_HTML_COMMENTS = re.compile(r'<!--.*?-->', re.DOTALL)
RE_BAD_TAGS = re.compile(r'<(meta|style|script|head|title|link|iframe|svg|noscript)[^>]*>.*?</\1>', re.IGNORECASE | re.DOTALL)
RE_BAD_TAGS_SELF = re.compile(r'<(meta|style|script|head|title|link|iframe|svg|noscript)[^>]*>', re.IGNORECASE)
RE_BR_TAGS = re.compile(r'(?:\s*<br\s*/?\>\s*)+$', re.IGNORECASE)
RE_MULTIPLE_NEWLINES = re.compile(r'\n\s*\n+')
RE_HTML_INTEGRITY = re.compile(r'<html.*?>.*?</html>', re.DOTALL | re.IGNORECASE)
RE_START_FRAGMENT = re.compile(r'StartFragment:(\d+)')
RE_END_FRAGMENT = re.compile(r'EndFragment:(\d+)')
RE_NEWLINE_CONVERSION = re.compile(r'(?<!<br/>)\n')

if ENABLE_NOTE:
    def save_content(new_content, new_group, word, selected_note_file, match):
        """保存笔记内容到文件"""
        current_note_content = get_note_content(selected_note_file)
        pattern = build_note_pattern(word)
        match_local = re.search(pattern, current_note_content, re.DOTALL)

        try:
            # 将换行符转为 <br/> 标签，但不重复替换已存在的标签
            if '\n' in new_content:
                new_content = new_content.replace('\r\n', '\n').replace('\r', '\n')
                new_content = RE_NEWLINE_CONVERSION.sub('<br/>\n', new_content)

            target_note_file = f"GoldenDict-MyNote-{new_group}.txt" if new_group and new_group != '[Default]' else DEFAULT_NOTE_FILE

            if selected_note_file == target_note_file:
                if not new_content.strip():
                    if match_local:
                        old_entry = match_local.group(0)
                        current_note_content = current_note_content.replace(f"{old_entry}\n", "")
                        current_note_content = current_note_content.replace(f"\n{old_entry}", "")
                else:
                    new_entry = f"{word}\n<font size=5>{word}</font>\n<p class=\"senselevel-1\">{new_content}</p>\n</>"
                    if match_local:
                        old_entry = match_local.group(0)
                        if old_entry != new_entry:
                            current_note_content = current_note_content.replace(old_entry, new_entry, 1)
                    else:
                        current_note_content = current_note_content.rstrip() + f"\n{new_entry}"

                save_path = os.path.join(NOTE_FOLDER, selected_note_file)
                safe_write_file(save_path, current_note_content)
                set_note_content(selected_note_file, current_note_content)
            else:
                if new_content.strip():
                    target_note_content = get_note_content(target_note_file)
                    new_entry = f"{word}\n<font size=5>{word}</font>\n<p class=\"senselevel-1\">{new_content}</p>\n</>"
                    target_note_content = target_note_content.rstrip() + f"\n{new_entry}"
                    save_path = os.path.join(NOTE_FOLDER, target_note_file)
                    safe_write_file(save_path, target_note_content)
                    set_note_content(target_note_file, target_note_content)

            return True
        except Exception as e:
            logging.error(f"Error saving note: {e}")
            return False
        
        

# 定义编辑界面
if ENABLE_NOTE:
    def edit_interface():
        global selected_note_file
        root = tk.Tk()
        root.title(f"Edit Note for '{word}'")
        root.geometry(get_window_geometry())  # 设置窗口大小和位置
        root.resizable(True, True)  # 允许调整大小
        # 解决“expected integer但 got UI”问题，Segoe UI 名称包含空格，需使用花括号
        root.option_add("*Font", "{Segoe UI} 9")  # 统一小字体，视觉紧凑
        root.configure(bg="#f5f5f5")

        # 主容器
        frame = tk.Frame(root, bg="#f5f5f5")
        frame.pack(fill=tk.BOTH, expand=True, padx=8, pady=8)

        # 沉浸式文本编辑区
        text_frame = tk.Frame(frame, bg="#ffffff")
        text_frame.pack(fill=tk.BOTH, expand=True)

        # 添加垂直滚动条
        scrollbar = tk.Scrollbar(text_frame)
        scrollbar.pack(side=tk.RIGHT, fill=tk.Y)

        # 文本区域
        text_area = tk.Text(text_frame, wrap=tk.WORD, yscrollcommand=scrollbar.set,
                            font=("Consolas", 11), padx=10, pady=10, spacing1=2, spacing3=2,
                            bd=0, bg="#ffffff", fg="#222")
        text_area.pack(side=tk.LEFT, fill=tk.BOTH, expand=True)
        scrollbar.config(command=text_area.yview)

        # 如果内容超过较大阈值，关闭 undo 以减少内存负担
        raw_text = re.sub(r'<br\s*/?>', '', result) if result_flag else ''
        if len(raw_text) > MAX_TEXT_UNDO_SIZE:
            text_area.config(undo=False, maxundo=-1, autoseparators=False)
        else:
            text_area.config(undo=True)

        # 初始大文本分段加载：只展示前TEXT_SIZE_THRESHOLD字符，避免卡顿
        # 分片加载：避免一次性显示超长文本导致卡顿
        is_lazy_mode = False
        is_full_loaded = True
        is_chunk_mode = False
        chunks = []
        current_chunk = 0
        chunk_count = 1
        loaded_chars = len(raw_text)
        chunk_nav_label = None

        if len(raw_text) > CHUNK_MODE_THRESHOLD:
            is_chunk_mode = True
            chunk_size = CHUNK_LOAD_STEP
            chunks = [raw_text[i:i + chunk_size] for i in range(0, len(raw_text), chunk_size)]
            chunk_count = len(chunks)
            current_chunk = 0

            text_area.insert("1.0", chunks[current_chunk])
            text_area.config(undo=False, maxundo=-1, autoseparators=False)

            def switch_chunk(new_index):
                nonlocal current_chunk
                # 保存当前块
                chunks[current_chunk] = text_area.get("1.0", "end").rstrip("\n")
                current_chunk = new_index
                text_area.delete("1.0", "end")
                text_area.insert("1.0", chunks[current_chunk])
                if chunk_nav_label:
                    chunk_nav_label.config(text=f"{current_chunk + 1}/{chunk_count} 片")

            def prev_chunk():
                if current_chunk > 0:
                    switch_chunk(current_chunk - 1)

            def next_chunk():
                if current_chunk < chunk_count - 1:
                    switch_chunk(current_chunk + 1)

            def merge_text():
                chunks[current_chunk] = text_area.get("1.0", "end").rstrip("\n")
                return "".join(chunks)

            def clear_all_chunks():
                nonlocal chunks, chunk_count, current_chunk
                chunks = [""]
                chunk_count = 1
                current_chunk = 0
                text_area.delete("1.0", "end")
                if chunk_nav_label:
                    chunk_nav_label.config(text="1/1 片")

            text_area.focus_set()

        elif len(raw_text) > TEXT_LAZY_LOAD_SIZE:
            is_lazy_mode = True
            is_full_loaded = False
            loaded_chars = TEXT_LAZY_LOAD_SIZE
            text_area.insert("1.0", raw_text[:loaded_chars])

            lazy_load_tip = tk.Label(frame, text=f"当前笔记内容较大（{len(raw_text)} 字符），已显示前 {loaded_chars} 字符；可点击加载更多。", fg="orange")
            lazy_load_tip.pack(pady=2)

            def load_more_text():
                nonlocal loaded_chars, is_full_loaded
                end_pos = min(len(raw_text), loaded_chars + CHUNK_LOAD_STEP)
                text_area.insert("end", raw_text[loaded_chars:end_pos])
                loaded_chars = end_pos
                if loaded_chars >= len(raw_text):
                    is_full_loaded = True
                    lazy_load_tip.config(text=f"已加载全部 {len(raw_text)} 字符。")
                    btn_load_more.config(state=tk.DISABLED)
                else:
                    lazy_load_tip.config(text=f"已加载 {loaded_chars}/{len(raw_text)} 字符（可继续加载）。")

            btn_load_more = tk.Button(frame, text="加载更多", command=load_more_text)
            btn_load_more.pack(pady=2)
        else:
            text_area.insert("1.0", raw_text)

        text_area.focus_set()

        # 定义粘贴模式变量，实际控件会在工具栏与 B/I/U 等按钮放在同一行
        clean_mode_var = tk.StringVar(value=get_last_paste_mode())

        # 使用正则表达式替换 <br>, <br/> 和 <br />

        # 获取剪切板HTML内容
        def get_clipboard_html(mode='safe'):
            def extract_raw_fragment(text):
                """提取剪贴板中的 HTML 片段，剥除头部元信息和 Start/EndFragment 标记。"""
                if not isinstance(text, str):
                    return text

                # 先处理 StartFragment/EndFragment 数字型标记
                m_start = re.search(r'StartFragment:(\d+)', text)
                m_end = re.search(r'EndFragment:(\d+)', text)
                if m_start and m_end:
                    try:
                        start = int(m_start.group(1))
                        end = int(m_end.group(1))
                        if 0 <= start < end <= len(text):
                            fragment = text[start:end]
                            fragment = fragment.replace('<!--StartFragment-->', '').replace('<!--EndFragment-->', '')
                            return fragment.strip()
                    except Exception:
                        pass

                # 再处理注释型标记
                start = text.find('<!--StartFragment-->')
                end = text.find('<!--EndFragment-->')
                if start != -1 and end != -1 and end > start:
                    fragment = text[start + len('<!--StartFragment-->'):end]
                    return fragment.strip()

                # 最后尝试找到完整 HTML
                match_html = re.search(r'<html.*?>.*?</html>', text, re.DOTALL | re.IGNORECASE)
                if match_html:
                    return match_html.group(0).strip()

                return text.strip()

            def get_plain_clipboard_text():
                if win32clipboard.IsClipboardFormatAvailable(win32con.CF_UNICODETEXT):
                    try:
                        text = win32clipboard.GetClipboardData(win32con.CF_UNICODETEXT)
                        if isinstance(text, str):
                            return text
                    except Exception:
                        pass

                if win32clipboard.IsClipboardFormatAvailable(win32con.CF_TEXT):
                    try:
                        text = win32clipboard.GetClipboardData(win32con.CF_TEXT)
                        if isinstance(text, bytes):
                            try:
                                return text.decode('utf-8')
                            except Exception:
                                return text.decode('cp936', errors='replace')
                        return text
                    except Exception:
                        pass

                return None

            def cleanup_fragment(text, mode='safe'):
                """使用 BeautifulSoup 清洁 HTML 文本，支持两种模式：safe / raw"""
                if text is None:
                    return None

                if mode == 'raw':
                    return extract_raw_fragment(text)

                # 如果文本过长，使用轻量级处理避免卡顿
                if len(text) > 10000:
                    # 简单正则清理：移除注释和危险标签
                    text = re.sub(r'<!--.*?-->', '', text, flags=re.DOTALL)
                    text = re.sub(r'<(meta|style|script|head|title|link|iframe|svg|noscript)[^>]*>.*?</\1>', '', text, flags=re.IGNORECASE | re.DOTALL)
                    text = re.sub(r'<(meta|style|script|head|title|link|iframe|svg|noscript)[^>]*>', '', text, flags=re.IGNORECASE)
                    # 去除 HTML 结构标签
                    text = re.sub(r'</?html[^>]*>', '', text, flags=re.IGNORECASE)
                    text = re.sub(r'</?body[^>]*>', '', text, flags=re.IGNORECASE)
                    # 将换行符替换为 <br> 标签
                    text = text.replace('\n', '<br>')
                    # 去除多余的连续 <br> 标签
                    text = re.sub(r'(<br\s*/?\>\s*)+', '<br>', text, flags=re.IGNORECASE)
                    # 移除不可见字符和控制字符
                    text = re.sub(r'[\x00-\x08\x0B\x0C\x0E-\x1F\x7F-\x9F]', '', text)
                    return text.strip()

                try:
                    soup = BeautifulSoup(text, 'html.parser')

                    # 移除 HTML 注释（如 <!--StartFragment--> 等）
                    for comment in soup.find_all(text=lambda text: isinstance(text, Comment)):
                        comment.extract()

                    # 移除不需要的标签（以及标签内容）
                    for bad in ['meta', 'style', 'script', 'head', 'title', 'link', 'iframe', 'svg', 'noscript']:
                        for tag in soup.find_all(bad):
                            tag.decompose()

                    # 去除 HTML 和 BODY 结构标签
                    html_tag = soup.find('html')
                    if html_tag:
                        html_tag.unwrap()
                    body_tag = soup.find('body')
                    if body_tag:
                        body_tag.unwrap()

                    # safe 模式：保留全部标签结构，清除危险属性和链接攻击媒介
                    for tag in soup.find_all():
                        safe_attrs = {}
                        if tag.name in ['ul', 'ol', 'li', 'dl', 'dt', 'dd']:
                            # 对于列表相关标签，保留无害属性如 class 和 id
                            for attr, value in tag.attrs.items():
                                if attr in ['class', 'id']:
                                    safe_attrs[attr] = value
                        elif tag.name == 'a':
                            href = tag.get('href')
                            if href:
                                href = href.strip()
                                if not href.lower().startswith(('javascript:', 'data:', 'vbscript:')):
                                    safe_attrs['href'] = href
                        elif tag.name == 'img':
                            src = tag.get('src')
                            alt = tag.get('alt')
                            if src and not src.strip().lower().startswith(('javascript:', 'data:', 'vbscript:')):
                                safe_attrs['src'] = src.strip()
                            if alt:
                                safe_attrs['alt'] = alt

                        # 保留标签文本、不删除内嵌标签结构
                        tag.attrs = safe_attrs

                    cleaned = soup.decode_contents()
                    # 将换行符替换为 <br> 标签
                    cleaned = cleaned.replace('\n', '<br>')
                    # 去除多余的连续 <br> 标签
                    cleaned = re.sub(r'(<br\s*/?\>\s*)+', '<br>', cleaned, flags=re.IGNORECASE)
                    # 移除不可见字符和控制字符
                    cleaned = re.sub(r'[\x00-\x08\x0B\x0C\x0E-\x1F\x7F-\x9F]', '', cleaned)
                    # 去除末尾空白
                    cleaned = cleaned.rstrip()
                    return cleaned

                except Exception:
                    return text.strip() if text else None

            win32clipboard.OpenClipboard()
            try:
                # 使用标准 HTML Format 格式
                html_format = win32clipboard.RegisterClipboardFormat('HTML Format')
                if win32clipboard.IsClipboardFormatAvailable(html_format):
                    try:
                        data = win32clipboard.GetClipboardData(html_format)
                    except Exception:
                        data = None

                    if data:
                        if isinstance(data, bytes):
                            try:
                                data = data.decode('utf-8')
                            except UnicodeDecodeError:
                                data = data.decode('cp936', errors='replace')

                        if isinstance(data, str):
                            if mode == 'raw':
                                # raw 模式：提取片段并去掉元信息（Version/StartHTML/EndHTML/StartFragment/EndFragment）
                                return extract_raw_fragment(data)

                            if mode == 'plain':
                                return get_plain_clipboard_text() or extract_raw_fragment(data)

                            # 尝试通过标准字段 StartFragment/EndFragment 提取
                            m_start = re.search(r'StartFragment:(\d+)', data)
                            m_end = re.search(r'EndFragment:(\d+)', data)
                            if m_start and m_end:
                                try:
                                    start = int(m_start.group(1))
                                    end = int(m_end.group(1))
                                    fragment = data[start:end]
                                    return cleanup_fragment(fragment, mode)
                                except Exception:
                                    pass

                            # 备选：<!--StartFragment--> / <!--EndFragment--> 标记
                            start = data.find('<!--StartFragment-->')
                            end = data.find('<!--EndFragment-->')
                            if start != -1 and end != -1 and end > start:
                                fragment = data[start + len('<!--StartFragment-->'):end]
                                return cleanup_fragment(fragment, mode)

                            # 备选：直接返回完整 HTML
                            match_html = re.search(r'<html.*?>.*?</html>', data, re.DOTALL | re.IGNORECASE)
                            if match_html:
                                return cleanup_fragment(match_html.group(0), mode)

                            return cleanup_fragment(data, mode)

                # 如果没有 HTML 格式，降级到纯文本格式
                if win32clipboard.IsClipboardFormatAvailable(win32con.CF_UNICODETEXT):
                    text = win32clipboard.GetClipboardData(win32con.CF_UNICODETEXT)
                    if text:
                        return cleanup_fragment(text)

                if win32clipboard.IsClipboardFormatAvailable(win32con.CF_TEXT):
                    text = win32clipboard.GetClipboardData(win32con.CF_TEXT)
                    if isinstance(text, bytes):
                        try:
                            text = text.decode('utf-8')
                        except Exception:
                            text = text.decode('cp936', errors='replace')
                    if text:
                        return cleanup_fragment(text)

                return None
            finally:
                win32clipboard.CloseClipboard()


        # 自定义粘贴函数
        def custom_paste(event):
            mode = clean_mode_var.get() if 'clean_mode_var' in locals() else 'safe'
            html = get_clipboard_html(mode)
            if html:
                text_area.insert(tk.INSERT, html)
                return 'break'  # 阻止默认粘贴
            else:
                # 默认粘贴纯文本
                text_area.event_generate('<<Paste>>')
                return 'break'

        # 绑定Ctrl+V到自定义粘贴
        text_area.bind('<Control-v>', custom_paste)


        status_label = tk.Label(frame, text="准备就绪", fg="green", bg="#eef", bd=1, relief=tk.SOLID)
        # 状态显示（编辑中/已保存）暂不单独占行，后面与分组控件同一行显示

        def save_content_sync(new_content, new_group, paste_mode, window_geometry):
            success = save_content(new_content, new_group, word, selected_note_file, match)
            if success:
                save_last_group(new_group if new_group else '[Default]')
                save_last_paste_mode(paste_mode)
                save_window_geometry(window_geometry)
            return success

        def save_current_content():
            nonlocal status_label
            new_group = group_entry.get().strip() or group_var.get().strip()
            paste_mode = clean_mode_var.get()
            window_geometry = root.geometry()

            if is_lazy_mode and not is_full_loaded:
                messagebox.showwarning("请先加载全部", "大笔记尚未加载完成；请点“加载全部”后再保存以避免文本丢失。")
                return False

            if is_chunk_mode:
                # 先保存当前块内容到内存
                chunks[current_chunk] = text_area.get("1.0", "end").rstrip("\n")
                new_content = "".join(chunks)
            else:
                new_content = text_area.get("1.0", "end").strip()

            status_label.config(text="保存中...")
            if not save_content_sync(new_content, new_group, paste_mode, window_geometry):
                status_label.config(text="保存失败")
                return False

            status_label.config(text=f"已保存 ({len(new_content)} 字符)")
            return True

        def on_save():
            if not save_current_content():
                return
            root.destroy()
            print(f'<script>window.location.replace({json.dumps(word)});</script>')

        # 绑定 Ctrl + S 快捷键触发保存
        root.bind('<Control-s>', lambda event: on_save())

        # 绑定 Ctrl + Z 快捷键触发撤销
        def on_undo(event=None):
            try:
                text_area.edit_undo()  # 调用 Text 小部件的撤销方法
            except tk.TclError:
                pass  # 如果没有更多可撤销的操作，忽略错误

        root.bind('<Control-z>', on_undo)  # 绑定 Ctrl+Z 快捷键


        
        # 绑定窗口关闭事件
        def on_close():
            # 保存窗口几何信息
            save_window_geometry(root.geometry())
            
            # 关闭 Tkinter 窗口
            root.destroy()
            
            # 当用户关闭窗口时，不执行保存操作
            print(f'<script>window.location.replace({json.dumps(word)});</script>')
                
        root.protocol("WM_DELETE_WINDOW", on_close)  # 绑定窗口关闭按钮
        

        # ===== UI按钮功能优化 =====
        def get_selection_range():
            """获取选择会讯的起止位置"""
            if text_area.tag_ranges(tk.SEL):
                return text_area.index(tk.SEL_FIRST), text_area.index(tk.SEL_LAST)
            return text_area.index(tk.INSERT), text_area.index(tk.INSERT)

        def insert_html_tag(start_idx, end_idx, tag_name, closing_tag=None):
            """在选择输入离描述的HTML标签"""
            if closing_tag is None:
                closing_tag = tag_name
            selected_text = text_area.get(start_idx, end_idx)
            if selected_text:
                text_area.delete(start_idx, end_idx)
                text_area.insert(start_idx, f"{tag_name}{selected_text}{closing_tag}")
            else:
                text_area.insert(start_idx, f"{tag_name}{closing_tag}")
                text_area.mark_set(tk.INSERT, start_idx)

        def insert_simple_tag(tag):
            """插入简单HTML标签"""
            start_idx, end_idx = get_selection_range()
            insert_html_tag(start_idx, end_idx, f"<{tag}>", f"</{tag}>")

        # 简化按钮函数
        add_bold = lambda: insert_simple_tag('b')
        add_italic = lambda: insert_simple_tag('i')
        add_underline = lambda: insert_simple_tag('u')
        add_strike = lambda: insert_simple_tag('s')

        # 绑定 Ctrl + B/I/U 快捷键
        root.bind('<Control-b>', lambda event: add_bold())
        root.bind('<Control-i>', lambda event: add_italic())
        root.bind('<Control-u>', lambda event: add_underline())

        # 添加 专名 按钮（带下划线的文本）
        def add_proper_name():
            start_idx, end_idx = get_selection_range()
            selected_text = text_area.get(start_idx, end_idx)
            if selected_text:
                text_area.delete(start_idx, end_idx)
                text_area.insert(start_idx, f'<span style="border-bottom: 1px solid; margin: 0 1px;">{selected_text}</span>')
            else:
                text_area.insert(start_idx, '<span style="border-bottom: 1px solid; margin: 0 1px;"></span>')
                text_area.mark_set(tk.INSERT, start_idx)

        # 添加 书名 按钮（带波浪下划线的文本）
        def add_book_title():
            start_idx, end_idx = get_selection_range()
            selected_text = text_area.get(start_idx, end_idx)
            if selected_text:
                text_area.delete(start_idx, end_idx)
                text_area.insert(start_idx, f'<span style="text-decoration: underline; text-decoration-style: wavy; text-decoration-thickness: 1px; text-underline-offset: 3px; margin: 0 1px;">{selected_text}</span>')
            else:
                text_area.insert(start_idx, '<span style="text-decoration: underline; text-decoration-style: wavy; text-decoration-thickness: 1px; text-underline-offset: 3px; margin: 0 1px;"></span>')
                text_area.mark_set(tk.INSERT, start_idx)

        # 添加 href 按钮（超链接）
        def add_href():
            start_idx, end_idx = get_selection_range()
            selected_text = text_area.get(start_idx, end_idx).strip()

            if selected_text:
                encoded_text = selected_text.replace(" ", "%20")
                text_area.delete(start_idx, end_idx)
                text_area.insert(start_idx, f'<a href="entry://{encoded_text}#section">{selected_text}</a>')
            else:
                text_area.insert(start_idx, '<a href="entry://EXAMPLE#section">EXAMPLE</a>')
                text_area.mark_set(tk.INSERT, start_idx)

        
        # 允许用户通过选择文本进行拖动
        content = ""  # 存储剪切的内容
        draging = False
        ctrl = False
        sel = None  # 这里定义 sel 变量

        def ctrlup(e):
            nonlocal ctrl
            if e.keycode == 17:
                ctrl = False

        def ctrldown(e):
            nonlocal ctrl
            if e.keycode == 17:
                ctrl = True

        def handledown(e):
            nonlocal content, sel
            try:
                if insel(e):
                    content = text_area.get("sel.first", "sel.last")
                    sel = (text_area.index("sel.first"), text_area.index("sel.last"))  # 保存选择的区域
                    return "break"
            except:
                content = ""

        def handledrag(e):
            nonlocal draging, content
            if content:
                draging = True
                text_area.mark_set("insert", "@%d,%d" % (e.x, e.y))
                return "break"

        def handleup(e):
            nonlocal draging, content, sel
            if draging:
                if not ctrl:
                    # 删除被选中的内容
                    text_area.delete(*sel)
                text_area.insert("insert", content)  # 插入剪切的内容
                draging = False
                content = ""
            else:
                if content:
                    text_area.tag_remove("sel", "1.0", "end")
                    content = ""
                    text_area.mark_set("insert", "@%d,%d" % (e.x, e.y))

        def insel(e):
            try:
                y1, x1 = map(int, text_area.index("sel.first").split("."))
                y2, x2 = map(int, text_area.index("@%d,%d" % (e.x, e.y)).split("."))
                y3, x3 = map(int, text_area.index("sel.last").split("."))
                if y3 > y2 > y1:
                    return 1
                if y2 == y1 and x2 > x1:
                    return 1
                elif y2 == y3 and x2 < x3:
                    return 1
            except:
                pass

        # 绑定鼠标拖放事件（超长文本时关闭以避免卡顿）
        if len(raw_text) <= MAX_TEXT_UNDO_SIZE:
            text_area.bind("<KeyPress>", ctrldown, 1)
            text_area.bind("<KeyRelease>", ctrlup, 1)
            text_area.bind("<B1-Motion>", handledrag, 1)
            text_area.bind("<Button-1>", handledown, 1)
            text_area.bind("<ButtonRelease-1>", handleup, 1)
        else:
            # 大文本模式下放弃拖拽剪切，以提高稳定性
            logging.info("Large edit content - drag/drop text handling disabled")
        
        
        # 添加图片并将图片保存到与分组同名的文件夹下
        def add_image():
            import shutil

            # 打开文件选择对话框
            image_path = filedialog.askopenfilename(
                title="Select an Image",
                filetypes=[("Image Files", "*.png;*.jpg;*.jpeg;*.gif;*.bmp")]
            )

            if image_path:
                # 确定分组名
                def get_group_from_file(file_name):
                    if file_name == DEFAULT_NOTE_FILE:
                        return 'Default'
                    else:
                        return file_name.replace('GoldenDict-MyNote-', '').replace('.txt', '')

                group = get_group_from_file(selected_note_file)

                # 确定目标路径：与分组同名的文件夹
                target_dir = os.path.join(NOTE_FOLDER, group)
                os.makedirs(target_dir, exist_ok=True)  # 自动创建文件夹

                # 获取图片文件扩展名
                _, file_extension = os.path.splitext(image_path)

                # 基础文件名：使用词头
                base_name = word
                counter = 1

                # 确保文件名唯一
                while True:
                    new_image_name = f"{base_name}{counter}{file_extension}"
                    target_path = os.path.join(target_dir, new_image_name)
                    if not os.path.exists(target_path):
                        break  # 文件名不存在，可以使用
                    counter += 1

                # 复制图片到目标目录
                try:
                    shutil.copy(image_path, target_path)
                except (IOError, OSError) as e:
                    logging.error(f"Error copying image: {e}")
                    messagebox.showerror("Error", f"Failed to copy image: {e}")
                    return

                # 在文本区域插入图片的 HTML 标签
                start_idx = text_area.index(tk.INSERT)  # 插入位置
                text_area.insert(start_idx, f'<img src="file://{target_path}" alt="Image" style="max-width:100%; height:auto;">\n')

        
        
        # 顶部紧凑工具栏
        existing_groups = [f.replace('GoldenDict-MyNote-', '').replace('.txt', '') for f in note_files if f != DEFAULT_NOTE_FILE]
        existing_groups.insert(0, '[Default]')  # 显示为 [Default]，但选择时会映射回默认文件

        last_remembered_group = get_last_group()  # 获取记忆的分组
        if selected_group:
            current_group = selected_group
        elif result_flag and len(all_group_names) > 0:
            current_group = all_group_names[0]
        else:
            current_group = last_remembered_group

        group_var = tk.StringVar(value=current_group)

        top_bar = tk.Frame(frame, bg="#ffffff", bd=0, highlightthickness=1, highlightbackground="#ddd")
        top_bar.pack(side=tk.TOP, fill=tk.X, pady=(0, 10), before=text_frame)

        btn_style = {"relief": tk.FLAT, "bg": "#ffffff", "activebackground": "#eee", "padx": 6, "pady": 4, "font": ("Segoe UI", 10)}

        tools_frame = tk.Frame(top_bar, bg="#ffffff")
        tools_frame.pack(side=tk.LEFT, padx=5)
        
        # 按钮定义表（优化：避免重复代码）
        buttons_defs = [
            ("B", add_bold), ("I", add_italic), ("U", add_underline), ("S", add_strike),
            ("🔗", add_href), ("🖋", add_proper_name), ("📖", add_book_title), ("🖼", add_image),
        ]
        for label, cmd in buttons_defs:
            tk.Button(tools_frame, text=label, **btn_style, command=cmd).pack(side=tk.LEFT)

        divider = tk.Frame(top_bar, width=1, bg="#ddd", height=26)
        divider.pack(side=tk.LEFT, padx=10, pady=6)

        paste_frame = tk.Frame(top_bar, bg="#ffffff")
        paste_frame.pack(side=tk.LEFT)
        tk.Label(paste_frame, text="📋", bg="#ffffff").pack(side=tk.LEFT)
        paste_menu = tk.OptionMenu(paste_frame, clean_mode_var, 'safe', 'raw', 'plain')
        paste_menu.config(relief=tk.FLAT, bg="#ffffff", width=5, activebackground="#eee")
        paste_menu.pack(side=tk.LEFT, padx=2)
        tk.Label(paste_frame, fg="#666", bg="#ffffff", font=("Segoe UI", 8)).pack(side=tk.LEFT, padx=4)

        group_frame = tk.Frame(top_bar, bg="#ffffff")
        group_frame.pack(side=tk.RIGHT, padx=5)
        tk.Label(group_frame, text="分组:", bg="#ffffff").pack(side=tk.LEFT)
        group_dropdown = tk.OptionMenu(group_frame, group_var, *existing_groups)
        group_dropdown.config(relief=tk.FLAT, bg="#ffffff", width=10, activebackground="#eee")
        group_dropdown.pack(side=tk.LEFT, padx=2)
        group_entry = tk.Entry(group_frame, width=12, relief=tk.FLAT, bg="#fafafa", fg="#222")
        group_entry.pack(side=tk.LEFT, padx=2)
        save_btn_style = btn_style.copy()
        save_btn_style.update({"bg": "#4CAF50", "fg": "white"})
        tk.Button(group_frame, text="💾", **save_btn_style, command=on_save).pack(side=tk.LEFT, padx=6)

        # --- 底部极简状态栏 (全排成一排) ---
        bottom_bar = tk.Frame(frame, bg="#f5f5f5")
        bottom_bar.pack(side=tk.BOTTOM, fill=tk.X, pady=(2, 0))

        # 左侧容器：放置所有操作按钮和导航
        controls_left = tk.Frame(bottom_bar, bg="#f5f5f5")
        controls_left.pack(side=tk.LEFT, fill=tk.X)

        if is_chunk_mode:
            # 分片导航按钮
            nav_btn_style = {"relief": tk.FLAT, "bg": "#ffffff", "padx": 3, "font": ("Segoe UI", 8)}
            
            tk.Button(controls_left, text="❮", **nav_btn_style, command=prev_chunk).pack(side=tk.LEFT, padx=1)
            chunk_nav_label = tk.Label(controls_left, text=f"{current_chunk + 1}/{chunk_count} 片", 
                                       bg="#f5f5f5", fg="#555", font=("Segoe UI", 8))
            chunk_nav_label.pack(side=tk.LEFT, padx=3)
            tk.Button(controls_left, text="❯", **nav_btn_style, command=next_chunk).pack(side=tk.LEFT, padx=1)
            
            # 功能按钮紧随其后
            tk.Button(controls_left, text="保存当前块", **nav_btn_style, 
                      command=lambda: [chunks.__setitem__(current_chunk, text_area.get("1.0", "end").rstrip("\n")), 
                                       chunk_nav_label.config(text="已保存!")]).pack(side=tk.LEFT, padx=5)
            
            tk.Button(controls_left, text="清空全部", relief=tk.FLAT, bg="#fee2e2", fg="#dc2626", 
                      font=("Segoe UI", 8), padx=3, command=clear_all_chunks).pack(side=tk.LEFT, padx=1)

        root.mainloop()

# 初始化变量
word_group = ""
result_temp = ""
output_html = ""

# 输出笔记所在分组
if ENABLE_NOTE and not selected_group and selected_note_file != DEFAULT_NOTE_FILE:
    word_group = "[" + selected_note_file.replace('GoldenDict-MyNote-', '').replace('.txt', '') + "]    "

# 输出 HTML 字体设置
if ENABLE_NOTE and 'result' in locals():
    result_temp = result
    
    # 清理输出文本中的 HTML 片段标记
    result_temp = re.sub(r'<!--StartFragment-->', '', result_temp)
    result_temp = re.sub(r'<!--EndFragment-->', '', result_temp)
    
    # 输出 HTML 字体设置
    # result_temp = f'<span style="font-family: \'LXGW WenKai GB\', \'Simsun\';">{result_temp}</span>'

    # 输出 HTML 日语字体设置
    # result_temp = result_temp.replace('「', '<span style="font-family: \'Klee One\', \'Yu Gothic\', \'Yu Mincho\';">「').replace('」', '」</span>')
    # result_temp = result_temp.replace('『', '<span style="font-family: \'Klee One\', \'Yu Gothic\', \'Yu Mincho\';">『').replace('』', '』</span>')
    

# 输出 HTML 格式
if ENABLE_NOTE and not selected_group:
    if len_results < 2:  # 笔记未找到或仅见于一个分组
        output_html = f"""
            <html>
            <body>
            <p>{query_output}</p>
            <p>{result_temp}</p>
            <p>{word_group}<a href="{word}?edit=true" id="edit_link">Edit my note</a></p>
            </body>
            </html>
            """
    else:  # 笔记见于多个分组
        links = ", ".join(
            f'<a href="{word}={group_name}?edit=true" id="edit_link">[{group_name}]</a>'
            for group_name in all_group_names
        )

        output_html = f"""
            <html>
            <body>
            <p>{query_output}</p>
            <p>{result_temp}</p>
            <p>Edit my note in group:
            {links}</p>
            </body>
            </html>
            """
else:
    output_html = f"""
    <html>
    <body>
    <p>{query_output}</p>
    </body>
    </html>
    """


# 如果查询词包含 ?edit=true，启动 Tkinter 编辑窗口
if is_edit_mode and ENABLE_NOTE:
    edit_interface()
else:
    # 输出 HTML 内容
    print(output_html)