python http parser

https://github.com/benoitc/http-parser/

如果可能,会调用C语言版本 加快分析速度

http-parser 提供 parser.HttpParser 底层的分析器http.HttpStream 提供高层的分析提供3种类型的readerIterReader 来读取 迭代器StringReader读取字符串SocketReader 读取sockets

使用HttpStream高层分析器

#!/usr/bin/env pythonimport socketfrom http_parser.http import HttpStreamfrom http_parser.reader import SocketReaderdef main():    s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)    try:        s.connect(('gunicorn.org', 80))        s.send("GET / HTTP/1.1\r\nHost: gunicorn.org\r\n\r\n")        r = SocketReader(s)        p = HttpStream(r)        print p.headers()        print p.body_file().read()    finally:        s.close()if __name__ == "__main__":    main()

使用

!/usr/bin/env pythonimport socket##try to import C parser then fallback in pure python parser.try:    from http_parser.parser import HttpParserexcept ImportError:    from http_parser.pyparser import HttpParserdef main():    p = HttpParser()    s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)    body = []    try:        s.connect(('gunicorn.org', 80))        s.send("GET / HTTP/1.1\r\nHost: gunicorn.org\r\n\r\n")    while True:        data = s.recv(1024)        if not data:            break        recved = len(data)        nparsed = p.execute(data, recved)        assert nparsed == recved        if p.is_headers_complete():            print p.get_headers()        if p.is_partial_body():            body.append(p.recv_body())        if p.is_message_complete():            break    print "".join(body)finally:    s.close()if __name__ == "main":    main()

http://www.foss-sourcebook.org/parsing-http-headers-in-python/

可以看到,python 标准库中的urllib2 获取header,得到的cookie也是字符串

request = urllib2.Request('http://zhiwei.li/')response = urllib2.urlopen(request)cookies = response.headers["Set-cookie"]

关于cookie的处理,请参考

http://zhiwei.li/text/2013/09/python%E4%BD%9C%E4%B8%BA%E5%AE%A2%E6%88%B7%E7%AB%AF%E5%A6%82%E4%BD%95%E6%96%B9%E4%BE%BF%E5%9C%B0%E5%A4%84%E7%90%86cookie/

python http parser

相关文章:

你感兴趣的文章:

标签云: