使用Servlet获取用户日志

前段时间,实验室需要开发一个用户日志模块,来对实验室的Web项目监控,获取用户的行为日志。个人首先觉得应该主要使用js来实现相关功能,无奈js水平着实太低,最终采用了servlet的方式来实现。

自己先从github上查询到了一个相关项目,clickstream,我先来介绍一下该项目是怎么实现的。

Clickstream的实现

它首先使用了一个Listener来监听ServletContext和HttpSession,代码如下

public class ClickstreamListener implements ServletContextListener, HttpSessionListener {private static final Log log = LogFactory.getLog(ClickstreamListener.class);/** The servlet context attribute key. */public static final String CLICKSTREAMS_ATTRIBUTE_KEY = "clickstreams";/** * The click stream (individual) attribute key: this is * the one inserted into the HttpSession. */public static final String SESSION_ATTRIBUTE_KEY = "clickstream";/** The current clickstreams, keyed by session ID. */private Map<String, Clickstream> clickstreams = new ConcurrentHashMap<String, Clickstream>();public ClickstreamListener() {log.debug("ClickstreamLogger constructed");}/** * Notification that the ServletContext has been initialized. * * @param sce The context event */public void contextInitialized(ServletContextEvent sce) {log.debug("ServletContext initialised");sce.getServletContext().setAttribute(CLICKSTREAMS_ATTRIBUTE_KEY, clickstreams);}/** * Notification that the ServletContext has been destroyed. * * @param sce The context event */public void contextDestroyed(ServletContextEvent sce) {log.debug("ServletContext destroyed");// help gc, but should be already clear except when exception was thrown during sessionDestroyedclickstreams.clear();}/** * Notification that a Session has been created. * * @param hse The session event */public void sessionCreated(HttpSessionEvent hse) {final HttpSession session = hse.getSession();if (log.isDebugEnabled()) {log.debug("Session " + session.getId() + " was created, adding a new clickstream.");}Object attrValue = session.getAttribute(SESSION_ATTRIBUTE_KEY);if (attrValue != null) {log.warn("Session " + session.getId() + " already has an attribute named " +SESSION_ATTRIBUTE_KEY + ": " + attrValue);}final Clickstream clickstream = new Clickstream();session.setAttribute(SESSION_ATTRIBUTE_KEY, clickstream);clickstreams.put(session.getId(), clickstream);}/** * Notification that a session has been destroyed. * * @param hse The session event */public void sessionDestroyed(HttpSessionEvent hse) {final HttpSession session = hse.getSession();// check if the session is not null (expired)if (session == null) {return;}if (log.isDebugEnabled()) {log.debug("Session " + session.getId() + " was destroyed, logging the clickstream and removing it.");}final Clickstream stream = clickstreams.get(session.getId());if (stream == null) {log.warn("Session " + session.getId() + " doesn't have a clickstream.");return;}try {if (stream.getSession() != null) {ClickstreamLoggerFactory.getLogger().log(stream);}}catch (Exception e) {log.error(e.getMessage(), e);}finally {clickstreams.remove(session.getId());}}}在这里,读者应该明白session和request之间的区别,一次session可以对应多个request,而多个request可以封装成一个Clickstream。所以使用了private Map<String, Clickstream> clickstreams = new ConcurrentHashMap<String, Clickstream>();

来存储session和Clickstream之间的映射。

每次创建一个session的时候,就在session里面绑定一个Clickstream。

Clickstream的定义如下:

public class Clickstream implements Serializable {private static final long serialVersionUID = 1;/** The stream itself: a list of click events. */private List<ClickstreamRequest> clickstream = new CopyOnWriteArrayList<ClickstreamRequest>();/** The attributes. */private Map<String, Object> attributes = new HashMap<String, Object>();/** The host name. */private String hostname;/** The original referer URL, if any. */private String initialReferrer;/** The stream start time. */private Date start = new Date();/** The time of the last request made on this stream. */private Date lastRequest = new Date();/** Flag indicating this is a bot surfing the site. */private boolean bot = false;/** * The session itself. * * Marked as transient so that it does not get serialized when the stream is serialized. * See JIRA issue CLK-14 for details. */private transient HttpSession session;/** * Adds a new request to the stream of clicks. The HttpServletRequest is converted * to a ClickstreamRequest object and added to the clickstream. * * @param request The serlvet request to be added to the clickstream */public void addRequest(HttpServletRequest request) {lastRequest = new Date();if (hostname == null) {hostname = request.getRemoteHost();session = request.getSession();}// if this is the first request in the click streamif (clickstream.isEmpty()) {// setup initial referrerif (request.getHeader("REFERER") != null) {initialReferrer = request.getHeader("REFERER");}else {initialReferrer = "";}// decide whether this is a botbot = BotChecker.isBot(request);}clickstream.add(new ClickstreamRequest(request, lastRequest));}/** * Gets an attribute for this clickstream. * * @param name */public Object getAttribute(String name) {return attributes.get(name);}/** * Gets the attribute names for this clickstream. */public Set<String> getAttributeNames() {return attributes.keySet();}/** * Sets an attribute for this clickstream. * * @param name * @param value */public void setAttribute(String name, Object value) {attributes.put(name, value);}/** * Returns the host name that this clickstream relates to. * * @return the host name that the user clicked through */public String getHostname() {return hostname;}/** * Returns the bot status. * * @return true if the client is bot or spider */public boolean isBot() {return bot;}/** * Returns the HttpSession associated with this clickstream. * * @return the HttpSession associated with this clickstream */public HttpSession getSession() {return session;}/** * The URL of the initial referer. This is useful for determining * how the user entered the site. * * @return the URL of the initial referer */public String getInitialReferrer() {return initialReferrer;}/** * Returns the Date when the clickstream began. * * @return the Date when the clickstream began */public Date getStart() {return start;}/** * Returns the last Date that the clickstream was modified. * * @return the last Date that the clickstream was modified */public Date getLastRequest() {return lastRequest;}/** * Returns the actual List of ClickstreamRequest objects. * * @return the actual List of ClickstreamRequest objects */public List<ClickstreamRequest> getStream() {return clickstream;}ClickstreamRequest是对HttpServletRequest的简化封装,定义如下:public class ClickstreamRequest implements Serializable {private static final long serialVersionUID = 1;private final String protocol;private final String serverName;private final int serverPort;private final String requestURI;private final String queryString;private final String remoteUser;private final long timestamp;public ClickstreamRequest(HttpServletRequest request, Date timestamp) {protocol = request.getProtocol();serverName = request.getServerName();serverPort = request.getServerPort();requestURI = request.getRequestURI();queryString = request.getQueryString();remoteUser = request.getRemoteUser();this.timestamp = timestamp.getTime();}public String getProtocol() {return protocol;}public String getServerName() {return serverName;}public int getServerPort() {return serverPort;}public String getRequestURI() {return requestURI;}public String getQueryString() {return queryString;}public String getRemoteUser() {return remoteUser;}public Date getTimestamp() {return new Date(timestamp);}/** * Returns a string representation of the HTTP request being tracked. * Example: <b>?arg1=foo&arg2=bar</b> * * @return a string representation of the HTTP request being tracked. */@Overridepublic String toString() {return serverName + (serverPort != 80 ? ":" + serverPort : "") + requestURI+ (queryString != null ? "?" + queryString : "");}}所以,当每次有请求时,使用Filter对request进行过滤,完善Clickstream的内容public void doFilter(ServletRequest req, ServletResponse res, FilterChain chain) throws IOException, ServletException {// Ensure that filter is only applied once per request.if (req.getAttribute(FILTER_APPLIED) == null) {log.debug("Applying clickstream filter to request.");req.setAttribute(FILTER_APPLIED, true);HttpServletRequest request = (HttpServletRequest)req;HttpSession session = request.getSession();Clickstream stream = (Clickstream) session.getAttribute(ClickstreamListener.SESSION_ATTRIBUTE_KEY);stream.addRequest(request);}else {log.debug("Clickstream filter already applied, ignoring it.");}// pass the request onchain.doFilter(req, res);}当session销毁的时候,把Clickstream持久化即可。

改进

1. Clickstram项目,使用ServletContext来存储Map,,意味着只能使用一个web容器,

不然无法保证ClickstreamRequest的顺序性,不利于拓展。所以在集群情况下,

比如tomcat集群,可以使用Redis来存储相关的对象。

把Clickstream拆成三部分:

Redis中的List, 每个元素对应着一个序列化之后的ClickstreamRequest 字符串;

Redis中的Hash,存储private Map<String, Object> attributes = new HashMap<String, Object>();

想做你的有缘人,可是我知道结果是惨淡的,但还是心存希望!

使用Servlet获取用户日志

相关文章:

你感兴趣的文章:

标签云: