package main import ( "bufio" "bytes" "encoding/binary" "fmt" "log" "os" "path/filepath" "strconv" "strings" "time" "unicode/utf16" ) type Entry struct { Word string Code string Order int } func encodeUTF16LE(s string) []byte { u := utf16.Encode([]rune(s)) buf := new(bytes.Buffer) for _, v := range u { binary.Write(buf, binary.LittleEndian, v) } return buf.Bytes() } func getUint32(val uint32) []byte { buf := make([]byte, 4) binary.LittleEndian.PutUint32(buf, val) return buf } func getUint16(val uint16) []byte { buf := make([]byte, 2) binary.LittleEndian.PutUint16(buf, val) return buf } func gen(table []Entry) []byte { buf := new(bytes.Buffer) stamp := uint32(time.Now().Unix()) buf.Write([]byte("mschxudp\x02\x00`\x00\x01\x00\x00\x00")) buf.Write(getUint32(0x40)) buf.Write(getUint32(0x40 + 4*uint32(len(table)))) buf.Write([]byte{0, 0, 0, 0}) buf.Write(getUint32(uint32(len(table)))) buf.Write(getUint32(stamp)) buf.Write(make([]byte, 32)) words := [][]byte{} codes := [][]byte{} offset := uint32(0) for i, entry := range table { word := encodeUTF16LE(entry.Word) code := encodeUTF16LE(entry.Code) words = append(words, word) codes = append(codes, code) if i != len(table)-1 { offset += uint32(len(word) + len(code) + 20) buf.Write(getUint32(offset)) } } for i, entry := range table { buf.Write([]byte{0x10, 0x00, 0x10, 0x00}) buf.Write(getUint16(uint16(len(codes[i]) + 18))) buf.Write([]byte{byte(entry.Order)}) buf.Write([]byte{0x06, 0x00, 0x00, 0x00, 0x00}) buf.Write(getUint32(stamp)) buf.Write(append(codes[i], 0x00, 0x00)) buf.Write(append(words[i], 0x00, 0x00)) } data := buf.Bytes() copy(data[0x18:], getUint32(uint32(len(data)))) return data } func loadInputFile(path string) ([]Entry, error) { file, err := os.Open(path) if err != nil { return nil, err } defer file.Close() var table []Entry var skipped int scanner := bufio.NewScanner(file) lines := []string{} for scanner.Scan() { lines = append(lines, scanner.Text()) } start := 0 if len(lines) > 0 && strings.Contains(lines[0], "词组") { start = 1 } for _, line := range lines[start:] { parts := strings.Split(line, ",") if len(parts) != 3 { log.Printf("⚠️ 跳过格式错误行: %s", line) skipped++ continue } order, err := strconv.Atoi(strings.TrimSpace(parts[2])) if err != nil { log.Printf("⚠️ 跳过无法解析行: %s", line) skipped++ continue } table = append(table, Entry{ Word: strings.TrimSpace(parts[0]), Code: strings.TrimSpace(parts[1]), Order: order, }) } fmt.Printf("\n📥 已读取词条:%d 条\n", len(table)) if skipped > 0 { fmt.Printf("⚠️ 跳过无效行:%d 条\n", skipped) } return table, nil } func saveToDat(table []Entry, output string) error { data := gen(table) err := os.WriteFile(output, data, 0644) if err != nil { return err } fmt.Printf("\n✅ .dat 文件已生成:%s\n", output) fmt.Printf("📊 共转换词条数:%d 条\n", len(table)) preview := 5 if len(table) < 5 { preview = len(table) } fmt.Println("🔍 示例词条预览:") for i := 0; i < preview; i++ { fmt.Printf(" - %s (%s) 序号: %d\n", table[i].Word, table[i].Code, table[i].Order) } return nil } func main() { log.SetFlags(0) log.SetPrefix("") fmt.Println("🛠️ 微软拼音词库转换工具") for { fmt.Print("📂 输入词库文件路径(默认 词库.csv): ") var inputPath string fmt.Scanln(&inputPath) if inputPath == "" { inputPath = "词库.csv" } inputDir := filepath.Dir(inputPath) defaultOutput := filepath.Join(inputDir, "微软自定义短语.dat") fmt.Printf("📁 输出.dat文件路径(默认 %s): ", defaultOutput) var outputPath string fmt.Scanln(&outputPath) if outputPath == "" { outputPath = defaultOutput } if _, err := os.Stat(inputPath); os.IsNotExist(err) { fmt.Printf("❌ 错误:文件不存在 → %s\n", inputPath) continue } table, err := loadInputFile(inputPath) if err != nil || len(table) == 0 { fmt.Println("⚠️ 未读取到有效词条,终止生成。") continue } if err := saveToDat(table, outputPath); err != nil { fmt.Printf("❌ 写入失败:%v\n", err) continue } break } fmt.Println("\n📌 按回车键退出程序...") fmt.Scanln() }