import struct
import os
import argparse
import sys
from functools import cmp_to_key
from datetime import datetime

def stardict_strcmp(s1, s2):
    """
    实现StarDict官方文档中指定的排序算法。
    先按字母顺序比较（忽略大小写），如果相同，再按原始字符串比较（区分大小写）。
    """
    # g_ascii_strcasecmp - 忽略大小写比较
    s1_lower = s1.lower()
    s2_lower = s2.lower()
    
    if s1_lower < s2_lower:
        return -1
    if s1_lower > s2_lower:
        return 1
    
    # strcmp - 区分大小写比较
    if s1 < s2:
        return -1
    if s1 > s2:
        return 1
        
    return 0

def parse_input_file(input_file):
    """
    解析源文本文件，返回词条和同义词列表。
    """
    print(f"[*] 解析输入文件: {input_file}")
    entries = []
    line_num = 0
    with open(input_file, 'r', encoding='utf-8') as f:
        for line in f:
            line_num += 1
            line = line.strip()
            if not line or '\t' not in line:
                print(f"    [警告] 第 {line_num} 行格式错误，已跳过: {line}")
                continue
            
            parts = line.split('\t', 1)
            word_part = parts[0]
            definition = parts[1].replace('\\n', '\n') # 转换换行符
            
            words = [w.strip() for w in word_part.split('|') if w.strip()]
            if not words:
                print(f"    [警告] 第 {line_num} 行单词部分为空，已跳过: {line}")
                continue

            main_word = words[0]
            synonyms = words[1:]
            
            entries.append({
                'word': main_word,
                'synonyms': synonyms,
                'definition': definition
            })
    print(f"[*] 解析完成，共找到 {len(entries)} 个主词条。")
    return entries

def create_stardict(input_file, bookname, author="make_stardict.py script", description=""):
    """
    主函数，用于生成StarDict字典文件。
    """
    basename = os.path.splitext(os.path.basename(input_file))[0]
    output_prefix = bookname # 使用书名作为文件名前缀

    # --- 1. 解析源文件并准备数据 ---
    parsed_entries = parse_input_file(input_file)
    if not parsed_entries:
        print("[!] 输入文件为空或格式不正确，无法生成字典。")
        return
        
    idx_data = []
    syn_data = []
    
    for entry in parsed_entries:
        # 主词条数据
        idx_data.append({
            'word': entry['word'],
            'definition': entry['definition']
        })
        # 同义词数据
        for syn in entry['synonyms']:
            syn_data.append({
                'synonym': syn,
                'original_word': entry['word']
            })

    # --- 2. 排序数据 ---
    print("[*] 正在对词条和同义词进行排序...")
    # 使用stardict_strcmp对主词条进行排序
    idx_data.sort(key=lambda x: cmp_to_key(stardict_strcmp)(x['word']))
    # 对同义词进行排序
    if syn_data:
        syn_data.sort(key=lambda x: cmp_to_key(stardict_strcmp)(x['synonym']))

    # --- 3. 生成 .dict 文件并收集 .idx 数据 ---
    print(f"[*] 正在生成: {output_prefix}.dict")
    dict_file_path = f"{output_prefix}.dict"
    idx_records = []
    current_offset = 0
    
    with open(dict_file_path, 'wb') as f_dict:
        for item in idx_data:
            definition_bytes = item['definition'].encode('utf-8')
            data_size = len(definition_bytes)
            
            # 写入 .dict 文件
            f_dict.write(definition_bytes)
            
            # 记录 .idx 数据
            idx_records.append({
                'word': item['word'],
                'offset': current_offset,
                'size': data_size
            })
            
            current_offset += data_size
            
    # --- 4. 生成 .idx 文件 ---
    print(f"[*] 正在生成: {output_prefix}.idx")
    idx_file_path = f"{output_prefix}.idx"
    with open(idx_file_path, 'wb') as f_idx:
        for record in idx_records:
            word_bytes = record['word'].encode('utf-8') + b'\0'
            # 使用 '!I' 表示网络字节序（大端）的32位无符号整数
            offset_bytes = struct.pack('!I', record['offset'])
            size_bytes = struct.pack('!I', record['size'])
            
            f_idx.write(word_bytes)
            f_idx.write(offset_bytes)
            f_idx.write(size_bytes)
            
    # --- 5. 生成 .syn 文件 (如果存在同义词) ---
    syn_file_path = f"{output_prefix}.syn"
    if syn_data:
        print(f"[*] 正在生成: {output_prefix}.syn")
        # 创建一个从单词到其在排序后列表中的索引的映射，以便快速查找
        word_to_index = {record['word']: i for i, record in enumerate(idx_records)}
        
        with open(syn_file_path, 'wb') as f_syn:
            for item in syn_data:
                original_word = item['original_word']
                if original_word not in word_to_index:
                    print(f"    [警告] 同义词 '{item['synonym']}' 指向的主词条 '{original_word}' 未找到，已跳过。")
                    continue
                
                original_index = word_to_index[original_word]
                
                synonym_bytes = item['synonym'].encode('utf-8') + b'\0'
                index_bytes = struct.pack('!I', original_index)
                
                f_syn.write(synonym_bytes)
                f_syn.write(index_bytes)
    
    # --- 6. 生成 .ifo 文件 ---
    print(f"[*] 正在生成: {output_prefix}.ifo")
    ifo_file_path = f"{output_prefix}.ifo"
    word_count = len(idx_records)
    idx_file_size = os.path.getsize(idx_file_path)
    syn_word_count = len(syn_data)
    
    ifo_content = [
        "StarDict's dict ifo file",
        "version=3.0.0",
        f"bookname={bookname}",
        f"wordcount={word_count}",
        f"idxfilesize={idx_file_size}"
    ]
    if syn_word_count > 0:
        ifo_content.append(f"synwordcount={syn_word_count}")
        
    ifo_content.extend([
        "idxoffsetbits=32",
        f"author={author}",
        f"description={description}",
        f"date={datetime.now().strftime('%Y.%m.%d')}",
        # 使用 sametypesequence=m 优化，因为所有释义都是纯文本
        "sametypesequence=m" 
    ])
    
    with open(ifo_file_path, 'w', encoding='utf-8') as f_ifo:
        f_ifo.write('\n'.join(ifo_content) + '\n')
        
    print("\n[+] StarDict 字典生成成功！")
    print("生成的文件如下:")
    print(f"  - {ifo_file_path}")
    print(f"  - {idx_file_path}")
    print(f"  - {dict_file_path}")
    if os.path.exists(syn_file_path):
        print(f"  - {syn_file_path}")
    print("\n提示: 为了获得最佳性能和兼容性，建议使用 `dictzip` 工具压缩 .dict 文件。")
    print("例如: dictzip {output_prefix}.dict")

if __name__ == '__main__':
    parser = argparse.ArgumentParser(description="通过文本文件生成 StarDict 字典。")
    parser.add_argument("input_file", help="输入的文本文件路径。格式：'单词1|同义词\t释义\\n第二行释义'")
    parser.add_argument("bookname", help="字典的名称 (Book Name)，也将作为输出文件的基本名称。")
    parser.add_argument("-a", "--author", default="make_stardict.py", help="字典作者信息。")
    parser.add_argument("-d", "--description", default="", help="字典描述信息。")
    
    args = parser.parse_args()
    
    if not os.path.exists(args.input_file):
        print(f"[!] 错误: 输入文件 '{args.input_file}' 不存在。", file=sys.stderr)
        sys.exit(1)
        
    create_stardict(args.input_file, args.bookname, args.author, args.description)