Hbase葱岭探秘–过滤器Api

Hbase中提供了许多的过滤器接口，以此来对数据进行过滤，使得查询出想要的数据。

行过滤器

针对行信息进行过滤，参数中可以采用前缀匹配、按位与、或、异或以及子串匹配等匹配的方式。同时可以控制EQUAL、NOT_EQUAL选项进行控制筛选数据的条件。

/**     * 行过滤器 BinaryComparator         NullComparator:是不是空值     * BitComparator:通过BitwiseOp类提供的按位与、或、异或操作进行位级别比较 RegexStringComparator:正则匹配     * SubStringComparator:子串是不是包含进行匹配     */    private static void testRowFilter() {        try {            HTable table = new HTable(config, "testtable");            Scan scan = new Scan();            scan.addColumn("col1".getBytes(), "name".getBytes());            // 行过滤器            Filter filter = new RowFilter(CompareOp.EQUAL,                    new BinaryComparator("row2".getBytes()));            scan.setFilter(filter);            ResultScanner result = table.getScanner(scan);            for (Result res : result) {                log.info("行过滤器>" + res);            }            // 正则的行过滤器            Filter filter2 = new RowFilter(CompareOp.EQUAL,                    new RegexStringComparator(".*.2"));            scan.setFilter(filter2);            ResultScanner resultRegx = table.getScanner(scan);            for (Result res : resultRegx) {                log.info("正则>" + res);            }            Filter filterSubString = new RowFilter(CompareOp.EQUAL,                    new SubstringComparator("w2"));            scan.setFilter(filterSubString);            ResultScanner resultSubString = table.getScanner(scan);            for (Result res : resultSubString) {                log.info("子串>" + res);            }            table.close();        } catch (IOException e) {            log.error(e);        }    }

列族过滤器

根据列族的数据进行筛选，形式和上面的行过滤器类似，通过控制相应的参数中的筛选的条件进行相应的筛选。

/**     * 列族过滤器     */    private static void testFamlyFilter() {        try {            HTable table = new HTable(config, "testtable");            Filter filter = new FamilyFilter(CompareOp.EQUAL,                    new BinaryComparator("col1".getBytes()));            Scan scan = new Scan("row2".getBytes(), filter);            ResultScanner result = table.getScanner(scan);            for (Result res : result) {                log.info(res);            }            Filter filterNull = new FamilyFilter(CompareOp.EQUAL,                    new RegexStringComparator(".*.1"));            Scan scanNull = new Scan("row2".getBytes(), filterNull);            scanNull.addFamily("col1".getBytes());            ResultScanner resultNull = table.getScanner(scanNull);            if (resultNull != null) {                for (Result res : resultNull) {                    log.info(res);                }            } else {                log.info("null");            }            table.close();        } catch (IOException e) {            log.error(e);        }    }

列名过滤器

和上面几个过滤器类似，这里是根据列进行筛选，设置相应的条件后就可以进行相应的筛选了。

/**     * 列名过滤器     */    public static void testColumFilter() {        try {            HTable table = new HTable(config, "testtable");            Filter filter = new QualifierFilter(CompareOp.EQUAL,                    new BinaryComparator("name".getBytes()));            Scan scan = new Scan("row2".getBytes(), filter);            ResultScanner result = table.getScanner(scan);            for (Result res : result) {                log.info(res);            }            Get get = new Get("row2".getBytes());            get.setFilter(filter);            Result resultGet = table.get(get);            log.info(resultGet);            table.close();        } catch (IOException e) {            log.info(e);        }    }

参考列过滤器

参考列过滤器根据列族和列限定符进行筛选，返回与参考列相同时间戳的行的所有键值对。

/**     * 参考列过滤器     */    public static void testDependentColumnFilter() {        try {            HTable table = new HTable(config, "testtable");            Filter filter = new DependentColumnFilter("col1".getBytes(),                    "name".getBytes(), false);            Scan scan = new Scan();            scan.setFilter(filter);            ResultScanner resu = table.getScanner(scan);            for (Result result : resu) {                log.info(result);            }            Get get = new Get("row2".getBytes());            get.setFilter(filter);            Result result = table.get(get);            log.info(result);            table.close();        } catch (IOException e) {            log.error(e);        }    }

单列过滤器

通过一列的值进行判断是不是需要进行过滤。

/**     * 单列过滤器     */    public static void testSingleColumnValueFilter() {        try {            HTable table = new HTable(config, "testtable");            Filter filter = new SingleColumnValueFilter("col1".getBytes(),                    "name".getBytes(), CompareOp.EQUAL, "wy".getBytes());            Scan scan = new Scan();            scan.setFilter(filter);            ResultScanner result = table.getScanner(scan);            for (Result res : result) {                log.info(res);            }            Get get = new Get("row2".getBytes());            get.setFilter(filter);            Result resultGet = table.get(get);            log.info(resultGet);            table.close();        } catch (IOException e) {            log.info(e);        }    }

前缀过滤器

根据前缀进行匹配行键的数据，本例中给出的是以row为前缀的行的数据。

/**     * 前缀过滤器     */    public static void testPrefixFilter() {        try {            HTable table = new HTable(config, "testtable");            Filter filter = new PrefixFilter("row".getBytes());            Scan scan = new Scan();            scan.setFilter(filter);            ResultScanner result = table.getScanner(scan);            for (Result res : result) {                log.info("res>" + res);            }            Get get = new Get("row2".getBytes());            Result resultGet = table.get(get);            log.info("get>" + resultGet);            table.close();        } catch (IOException e) {            log.info(e);        }    }

分页过滤器

通过pageFilter设置一页中数据的条数，注意，在重新设置起始行的时候，要使得新的行和数据库中有区别，否则，会死循环无法停止。

/**     * 分页过滤器     */    public static void testPageFilter() {        try {            HTable table = new HTable(config, "testtable");            Filter filter = new PageFilter(10);            int totalRows = 0;            byte[] lastRow = null;            Scan scan = new Scan();            while (true) {                scan.setFilter(filter);                if (lastRow != null) {                    // 加上0后表示新的开始防止row的内容一样造成死循环                    byte[] startRow = Bytes.add(lastRow, POSTFIX);                    scan.setStartRow(startRow);                }                ResultScanner resultScan = table.getScanner(scan);                int localRows = 0;                Result result = resultScan.next();                while (result != null) {                    log.info(result);                    localRows++;                    totalRows++;                    lastRow = result.getRow();                    result = resultScan.next();                }                if (localRows == 0)                    break;            }            log.info(totalRows);            table.close();        } catch (IOException e) {            log.info(e);        }    }

/**     * 列分页过滤     */    public static void testColumnPaginationFilter() {        try {            HTable table = new HTable(config, "testtable");            Filter filter = new ColumnPaginationFilter(5, 10);            Scan scan = new Scan();            scan.setFilter(filter);            ResultScanner result = table.getScanner(scan);            for (Result res : result) {                log.info(res);            }            table.close();        } catch (IOException e) {            log.info(e);        }    }

Skip过滤器

与ValueFilter结合使用，如果一行中某一列不符合要求的话直接被过滤掉。

/**     * 跳过过滤器     */    public static void testSkipFilter() {        try {            HTable table = new HTable(config, "testtable");            Filter filt = new ValueFilter(CompareOp.NOT_EQUAL,                    new BinaryComparator("v".getBytes()));            Scan scanValue = new Scan();            scanValue.setFilter(filt);            ResultScanner ress = table.getScanner(scanValue);            for (Result result : ress) {                log.info("<" + result);            }            Filter filter = new SkipFilter(filt);            Scan scan = new Scan();            scan.setFilter(filter);            ResultScanner result = table.getScanner(scan);            for (Result res : result) {                log.info(">" + res);            }            table.close();        } catch (IOException e) {            log.info(e);        }    }

全匹配过滤器

在遇到某个条件之前的数据全部查询出来，直到遇到满足该条件的数据之后结束查询。

    /**     * 全匹配过滤器     */    public static void testWhileMatch() {        try {            HTable table = new HTable(config, "testtable");            Filter filt = new RowFilter(CompareOp.NOT_EQUAL,                    new BinaryComparator("row6".getBytes()));            Scan scan = new Scan();            scan.setFilter(filt);            ResultScanner results = table.getScanner(scan);            for (Result res : results) {                log.info(">" + res);            }            Filter filter = new WhileMatchFilter(filt);            scan.setFilter(filter);            ResultScanner resultScan = table.getScanner(scan);            for (Result res : resultScan) {                log.info("<" + res);            }            table.close();        } catch (IOException e) {            log.info(e);        }    }

过滤器组合

可以将上面的过个过滤器放在一个List中，然后形成多个过滤器的组合的形式进行过滤。

    /**     * 过滤器组合     */    public static void testFilterList() {        List<Filter> filterList = new ArrayList<Filter>();        Filter filter1 = new SingleColumnValueFilter("col1".getBytes(),                "name".getBytes(), CompareOp.EQUAL, "x".getBytes());        filterList.add(filter1);        Filter filter2 = new RowFilter(CompareOp.NOT_EQUAL,                new BinaryComparator("row2".getBytes()));        filterList.add(filter2);        FilterList filters = new FilterList(filterList);        Scan scan = new Scan();        scan.setFilter(filters);        try {            HTable table = new HTable(config, "testtable");            ResultScanner result = table.getScanner(scan);            for (Result res : result) {                log.info(res);            }            table.close();        } catch (IOException e) {            log.info(e);        }    }

转载注明出处：http://blog.csdn.net/wangyang1354/article/details/53761559

人生的大部份时间里，承诺同义词是束缚，奈何我们向往束缚。

相关文章：

你感兴趣的文章：

标签云：