听说google出品的go语言也是系统开发中常用的,性能到底怎么样,还是老办法,让DeepSeek写个程序跑一下,基于以往的经验,直接让它同时编写有缓冲和无缓冲版本。
package mainimport ("bufio""fmt""os""sort""time"
)// 无缓冲版本
func unbufferedSort(filename string) error {lines, err := readLines(filename)if err != nil {return err}sort.Strings(lines) // Go标准库的优化排序for _, line := range lines {fmt.Println(line) // 无缓冲直接输出}return nil
}// 带缓冲版本 (默认4MB缓冲区)
func bufferedSort(filename string, bufferSize int) error {lines, err := readLines(filename)if err != nil {return err}sort.Strings(lines)// 创建缓冲写入器writer := bufio.NewWriterSize(os.Stdout, bufferSize)defer writer.Flush()for _, line := range lines {_, err := writer.WriteString(line + "\n")if err != nil {return err}}return nil
}// 通用文件读取函数
func readLines(filename string) ([]string, error) {file, err := os.Open(filename)if err != nil {return nil, err}defer file.Close()var lines []stringscanner := bufio.NewScanner(file)for scanner.Scan() {lines = append(lines, scanner.Text())}return lines, scanner.Err()
}func main() {if len(os.Args) < 3 {fmt.Println("Usage:")fmt.Println(" sort_lines <filename> <mode> [bufferSize]")fmt.Println("Modes: unbuffered, buffered")os.Exit(1)}filename := os.Args[1]mode := os.Args[2]start := time.Now()var err errorswitch mode {case "unbuffered":err = unbufferedSort(filename)case "buffered":bufferSize := 4 * 1024 * 1024 // 默认4MBif len(os.Args) > 3 {_, err = fmt.Sscanf(os.Args[3], "%d", &bufferSize)if err != nil {fmt.Printf("Invalid buffer size: %v\n", err)os.Exit(1)}}err = bufferedSort(filename, bufferSize)default:fmt.Println("Invalid mode")os.Exit(1)}if err != nil {fmt.Printf("Error: %v\n", err)os.Exit(1)}elapsed := time.Since(start)fmt.Fprintf(os.Stderr, "Processing time: %s\n", elapsed)
}
编译执行
go/bin/go build -o gosort sort_lines.go
time ./gosort varchar.txt buffered > /dev/null
Processing time: 3.497140997sreal 0m3.554s
user 0m0.621s
sys 0m0.366s
time ./gosort varchar.txt unbuffered > /dev/null
Processing time: 3.720407838sreal 0m4.603s
user 0m0.755s
sys 0m0.474s
time ./gosort varchar.txt buffered > vcc.txt
Processing time: 3.798995799sreal 0m3.855s
user 0m0.681s
sys 0m0.301stime ./gosort varchar.txt buffered 65536 > vcc.txt
Processing time: 3.891683917sreal 0m3.959s
user 0m0.627s
sys 0m0.380stime ./gosort varchar.txt unbuffered > vcc.txt
^Creal 1m26.182s
user 0m3.305s
sys 0m7.983s
nm -D gosort >go.h
nm: gosort: no symbols
nm gosort >go.h
如上所示,带缓冲的版本性能尚可,缓冲区大小影响不大。无缓冲版本只能用离奇来表示,而且go语言默认编译就是优化,也没啥可以调优的。
与Zig语言一样,编译出的是静态版本,看不出调用了哪些系统库函数。
后记
张泽鹏先生提醒我是否在WSL上测试,根据以往的经验WSL的读写性能比较糟糕
我改用windows版本编译运行,比较正常
C:\d>gosort varchar.txt buffered > vcc.txt
Processing time: 600.3093msC:\d>gosort varchar.txt unbuffered > vcc.txt
Processing time: 2.4327929s
张先生也写了一版,效率比deepseek的还高一些。他采用了64K读取缓冲区和ReadString,而deepseek使用了NewScanner。
package mainimport ("bufio""fmt""io""os""slices"
)func main() {if len(os.Args) != 2 {fmt.Fprintf(os.Stderr, "Usage: %s <filename>\n", os.Args[0])os.Exit(1)}file, err := os.Open(os.Args[1])if err != nil {fmt.Fprintf(os.Stderr, "failed to open file: %w", err)os.Exit(1)}defer file.Close()lines := make([]string, 0, 1024)// 读取reader := bufio.NewReaderSize(file, 64*1024)for {line, err := reader.ReadString('\n')if err == io.EOF {if len(line) > 0 {// 最后一行有内容但没有换行符;添加内容和换行符lines = append(lines, line + "\n")}break} else if err != nil { // 其他错误fmt.Fprintf(os.Stderr, "failed to read line: %w", err)os.Exit(1)os.Exit(1)} else {lines = append(lines, line)}}// 排序slices.Sort(lines)// 输出writer := bufio.NewWriterSize(os.Stdout, 64*1024)defer writer.Flush()for _, line := range lines {if _, err := writer.WriteString(line); err != nil {fmt.Fprintf(os.Stderr, "failed to write line: %w", err)os.Exit(1)}}
}
在Kylinx上运行的结果如下
:/shujv/par$ time ./zhanggosort varchar.txt > /dev/nullreal 0m0.875s
user 0m0.792s
sys 0m0.156s
:/shujv/par$ time ./zhanggosort varchar.txt > vcc.txtreal 0m0.874s
user 0m0.768s
sys 0m0.052s
:/shujv/par$ time ./gosort varchar.txt buffered > vcc.txt
Processing time: 1.14983689sreal 0m1.187s
user 0m1.132s
sys 0m0.264s
go的一个好处是不挑glibc版本,kylinx直接跑