前端时间在微博看到讨论用Python实现tail命令,即输出文件的最后N行,我便试试用C、Python、Common Lisp写个小程序,然后对比下性能。
我对Common Lisp是初学阶段,先从I/O、系统文件接口、Format、Package、ASDF入手的,对循环、宏等都不是很清楚,写的程序性能肯定不会太 高,对于C和Python,也是一知半解,就权当玩玩吧.
预计: 对小型文件效率会高,大型文件效率直线下降,因为是读取所有内容再切片。
运行方式:
python py-tail-v1.py num FILE1 [FILE2..]
py-tail-v1.py:
#!/usr/bin/env python#-*- coding:utf-8 -*-import os,sysdef tail(num=10, files=list()): "open a file, show its last *num* lines" if not isinstance(num,int) or num < 0: print "**Error** the *num* should be Int number ( > 0)" sys.exit(-1) flag = True if len(files) > 1 else False for filename in files: if not os.path.exists(filename): print "**Error** Can not find file ", filename sys.exit(-1) if flag: print "==> "+filename+" <==" cin = open(filename,"r") for line in cin.readlines()[-num:]: print line, cin.close()if __name__ == "__main__": if len(sys.argv) < 3: print "Usage: ", sys.argv[0]," num FILE1 [FILE2..] " sys.exit(-1) tail(int(sys.argv[1]),sys.argv[2:])
预计: 效率会高,是用C实现的封装,使用方面,应该总是使用这个。
运行方式:
python py-tail-v2.py num FILE1 [FILE2..]
py-tail-v2.py:
#!/usr/bin/env python#-*- coding:utf-8 -*-import os,sysfrom collections import dequedef tail(num=10, files=list()): "open a file, show its last *num* lines" if not isinstance(num,int) or num < 0: print "**Error** the *num* should be Int number ( > 0)" sys.exit(-1) flag = True if len(files) > 1 else False for filename in files: if not os.path.exists(filename): print "**Error** Can not find file ", filename sys.exit(-1) if flag: print "==> "+filename+" <==" with open(filename,"r") as cin: lines = deque(cin,num) cin.close() for line in lines: print line,if __name__ == "__main__": if len(sys.argv) < 3: print "Usage: ", sys.argv[0]," num FILE1 [FILE2..] " sys.exit(-1) tail(int(sys.argv[1]),sys.argv[2:])
预计:小型文件,效率未必比Python版本一高;大型文件,效率应该在Python版本一和版本二之间。
运行方式:
./c-tail num FILE1 [FILE2..]
c-tail.c:
#include <stdio.h>/* * @num : the last num line of file to display * @files: a file list * @len : the length of @files list * * @return: void * * * sum of the '\n' to get the position of last @num line, * thus display the last @num line one by one */void tail(int num, char *files[],int len){ FILE *fp; char buf[1024]; char ch; int i = 0; int pos = 0; int count = 0; int length = 0; for (i=0; i < len; i++){ fp = fopen(files[i],"r"); if (NULL == fp){ printf("Can not open file %s\n",files[i]); } if(1 < len){ printf("==> %s <==\n",files[i]); } pos = -1; count = 0; length = 0; while(count <= num){ if(ftell(fp) == (SEEK_SET+1)){ fseek(fp,0,SEEK_SET); break; } fseek(fp,pos,SEEK_END); pos -= 1; ch = fgetc(fp); if(ch == '\n'){ count += 1; } if(length == 0){ length = ftell(fp); } } while(feof(fp) != 1){ if (ftell(fp) == length){ break; } fgets(buf,1024,fp); printf("%s",buf); fflush(fp); } fclose(fp); }}int main(int argc, char **argv){ char *files[argc]; int i = 1; if (3 > argc){ printf("Usage: %s num FILE1 [FILE2..]\n", argv[0]); return -1; } for (i = 2; i < argc; i++){ files[i-2] = argv[i]; } tail(atoi(argv[1]),files,argc-2); return 0;}
预计: 由于对CL不熟悉,思路和C一样,效率就不要考虑了。
使用SBCL运行,并获取SBC传入的参数:
sbcl --script c-tail.lisp num FILE1 [FILE2..]
cl-tail.lisp:
;; 2013-09-27;; Leslie Zhu;; pythonisland@gmail.com;;;;(in-package :cl-user)(defpackage :cz.leslie.tail (:nicknames :cz-tail) (:use :cl :cl-user) (:export :display :tail-stream :tail-file :tail))(in-package :cz-tail)(defun display(stream pos) "read last lines from positon of stream" (file-position stream pos) (loop for line = (read-line stream nil) while line do (format t "~a~%" line)))(defun tail-stream (stream num) "display the last *num* line from a *stream*" (let ((count -1) (pos 1) (len (file-length stream))) (file-position stream (- len pos)) (loop for char = (read-char stream nil) while char do (when (< count num) (progn (if (char= char #\Newline) (setq count (+ count 1))) (if (= count num) (display stream (- len (- pos 1)))) (setq pos (+ pos 1)) (if (>= pos len) (display stream 0) (file-position stream (- len pos))))))))(defun tail-file (num filename) "Open a file, return stream for reading" (with-open-file (stream filename :direction :input :if-does-not-exist nil) (tail-stream stream num)))(defun tail (num files) "open every file in files, tail the last *num* line" (if (> num 0) (dolist (file files) (if (> (length files) 1) (format t "==> ~a <==~%" file)) (tail-file num file))));; test(defvar *num* (car (cdr sb-ext:*posix-argv*)))(defvar *files* (cdr (cdr sb-ext:*posix-argv*)))(tail (parse-integer *num*) *files*)
说明:
对于小型文件,Python版本一比版本二更快;对于大型文件,版本二性能更好;两个版本应对一般的小文件性能足够,大文件就算了吧.C语言版本的速度是神速,和GNU/Linux的tail命令还是差一点;在大型文件时,性能依旧比较高;Common Lisp的速度竟然比Python快,出乎意外测试不是大量测试然后取平均值,属于个别测试,有很大的随机性,不足为凭
说明:
对于大型文件,Python版本的效率明显降低C程序效率依旧很高SBCL效率不错,且第一次启动会慢,后面再次启动就会快,应该有缓存之类的机制依旧有一定的随机性
虽然测试的方法值得商榷,但基本的测试做完后,对Common Lisp的信心满满的,速度没有传说的那么低嘛!
源码: https://github.com/LeslieZhu/epos.git
作者简介:
朱春来(Leslie Zhu),金融工程师,毕业于西安电子科技大学, 喜欢历史,喜欢编程. 日常在GNU/Linux环境下进行C/C++、Python开发,对Common Lisp、Node.js、金融等感兴趣。可以通过邮箱(pythonisland@gmail.com)联系他,或者直接在他的个人主页上留言.
访问朱春来(Leslie Zhu)的个人主页(http://lesliezhu.github.com)