001/* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018package org.apache.hadoop.hbase.filter; 019 020import java.io.IOException; 021import java.util.ArrayList; 022import java.util.Arrays; 023import java.util.Comparator; 024import java.util.List; 025import java.util.Objects; 026import java.util.PriorityQueue; 027import org.apache.hadoop.hbase.Cell; 028import org.apache.hadoop.hbase.CellComparator; 029import org.apache.hadoop.hbase.PrivateCellUtil; 030import org.apache.hadoop.hbase.exceptions.DeserializationException; 031import org.apache.hadoop.hbase.unsafe.HBasePlatformDependent; 032import org.apache.hadoop.hbase.util.Bytes; 033import org.apache.hadoop.hbase.util.Pair; 034import org.apache.yetus.audience.InterfaceAudience; 035 036import org.apache.hbase.thirdparty.com.google.protobuf.InvalidProtocolBufferException; 037import org.apache.hbase.thirdparty.com.google.protobuf.UnsafeByteOperations; 038 039import org.apache.hadoop.hbase.shaded.protobuf.generated.FilterProtos; 040import org.apache.hadoop.hbase.shaded.protobuf.generated.HBaseProtos.BytesBytesPair; 041 042/** 043 * This is optimized version of a standard FuzzyRowFilter Filters data based on fuzzy row key. 044 * Performs fast-forwards during scanning. It takes pairs (row key, fuzzy info) to match row keys. 045 * Where fuzzy info is a byte array with 0 or 1 as its values: 046 * <ul> 047 * <li>0 - means that this byte in provided row key is fixed, i.e. row key's byte at same position 048 * must match</li> 049 * <li>1 - means that this byte in provided row key is NOT fixed, i.e. row key's byte at this 050 * position can be different from the one in provided row key</li> 051 * </ul> 052 * Example: 053 * <p/> 054 * Let's assume row key format is userId_actionId_year_month. Length of userId is fixed and is 4, 055 * length of actionId is 2 and year and month are 4 and 2 bytes long respectively. 056 * <p/> 057 * Let's assume that we need to fetch all users that performed certain action (encoded as "99") in 058 * Jan of any year. Then the pair (row key, fuzzy info) would be the following: 059 * 060 * <pre> 061 * row key = "????_99_????_01" (one can use any value instead of "?") 062 * fuzzy info = "\x01\x01\x01\x01\x00\x00\x00\x00\x01\x01\x01\x01\x00\x00\x00" 063 * </pre> 064 * 065 * I.e. fuzzy info tells the matching mask is "????_99_????_01", where at ? can be any value. 066 */ 067@InterfaceAudience.Public 068public class FuzzyRowFilter extends FilterBase implements HintingFilter { 069 private static final boolean UNSAFE_UNALIGNED = HBasePlatformDependent.unaligned(); 070 071 // the wildcard byte is 1 on the user side. but the filter converts it internally 072 // in preprocessMask. This was changed in HBASE-15676 due to a bug with using 0. 073 // in v1, the 1 byte gets converted to 0 074 // in v2, the 1 byte gets converted to 2. 075 // we support both here to ensure backwards compatibility between client and server 076 static final byte V1_PROCESSED_WILDCARD_MASK = 0; 077 static final byte V2_PROCESSED_WILDCARD_MASK = 2; 078 079 private final byte processedWildcardMask; 080 private final List<Pair<byte[], byte[]>> fuzzyKeysData; 081 // Used to record whether we want to skip the current row. 082 // Usually we should use filterRowKey here but in the current scan implementation, if filterRowKey 083 // returns true, we will just skip to next row, instead of calling getNextCellHint to determine 084 // the actual next row, so we need to implement filterCell and return SEEK_NEXT_USING_HINT to let 085 // upper layer call getNextCellHint. 086 // And if we do not implement filterRow, sometimes we will get incorrect result when using 087 // FuzzyRowFilter together with other filters, please see the description for HBASE-26967 for more 088 // details. 089 private boolean filterRow; 090 private boolean done = false; 091 092 /** 093 * The index of a last successfully found matching fuzzy string (in fuzzyKeysData). We will start 094 * matching next KV with this one. If they do not match then we will return back to the one-by-one 095 * iteration over fuzzyKeysData. 096 */ 097 private int lastFoundIndex = -1; 098 099 /** 100 * Row tracker (keeps all next rows after SEEK_NEXT_USING_HINT was returned) 101 */ 102 private final RowTracker tracker; 103 104 // this client side constructor ensures that all client-constructed 105 // FuzzyRowFilters use the new v2 mask. 106 public FuzzyRowFilter(List<Pair<byte[], byte[]>> fuzzyKeysData) { 107 this(fuzzyKeysData, V2_PROCESSED_WILDCARD_MASK); 108 } 109 110 // This constructor is only used internally here, when parsing from protos on the server side. 111 // It exists to enable seamless migration from v1 to v2. 112 // Additionally used in tests, but never used on client side. 113 FuzzyRowFilter(List<Pair<byte[], byte[]>> fuzzyKeysData, byte processedWildcardMask) { 114 this.processedWildcardMask = processedWildcardMask; 115 116 List<Pair<byte[], byte[]>> fuzzyKeyDataCopy = new ArrayList<>(fuzzyKeysData.size()); 117 118 for (Pair<byte[], byte[]> aFuzzyKeysData : fuzzyKeysData) { 119 if (aFuzzyKeysData.getFirst().length != aFuzzyKeysData.getSecond().length) { 120 Pair<String, String> readable = new Pair<>(Bytes.toStringBinary(aFuzzyKeysData.getFirst()), 121 Bytes.toStringBinary(aFuzzyKeysData.getSecond())); 122 throw new IllegalArgumentException("Fuzzy pair lengths do not match: " + readable); 123 } 124 125 Pair<byte[], byte[]> p = new Pair<>(); 126 // create a copy of pair bytes so that they are not modified by the filter. 127 p.setFirst(Arrays.copyOf(aFuzzyKeysData.getFirst(), aFuzzyKeysData.getFirst().length)); 128 p.setSecond(Arrays.copyOf(aFuzzyKeysData.getSecond(), aFuzzyKeysData.getSecond().length)); 129 130 // update mask ( 0 -> -1 (0xff), 1 -> [0 or 2 depending on processedWildcardMask value]) 131 p.setSecond(preprocessMask(p.getSecond())); 132 preprocessSearchKey(p); 133 134 fuzzyKeyDataCopy.add(p); 135 } 136 this.fuzzyKeysData = fuzzyKeyDataCopy; 137 this.tracker = new RowTracker(); 138 } 139 140 private void preprocessSearchKey(Pair<byte[], byte[]> p) { 141 if (!UNSAFE_UNALIGNED) { 142 // do nothing 143 return; 144 } 145 byte[] key = p.getFirst(); 146 byte[] mask = p.getSecond(); 147 for (int i = 0; i < mask.length; i++) { 148 // set non-fixed part of a search key to 0. 149 if (mask[i] == processedWildcardMask) { 150 key[i] = 0; 151 } 152 } 153 } 154 155 /** 156 * We need to preprocess mask array, as since we treat 2's as unfixed positions and -1 (0xff) as 157 * fixed positions 158 * @return mask array 159 */ 160 private byte[] preprocessMask(byte[] mask) { 161 if (!UNSAFE_UNALIGNED) { 162 // do nothing 163 return mask; 164 } 165 if (isPreprocessedMask(mask)) return mask; 166 for (int i = 0; i < mask.length; i++) { 167 if (mask[i] == 0) { 168 mask[i] = -1; // 0 -> -1 169 } else if (mask[i] == 1) { 170 mask[i] = processedWildcardMask;// 1 -> 0 or 2 depending on mask version 171 } 172 } 173 return mask; 174 } 175 176 private boolean isPreprocessedMask(byte[] mask) { 177 for (int i = 0; i < mask.length; i++) { 178 if (mask[i] != -1 && mask[i] != processedWildcardMask) { 179 return false; 180 } 181 } 182 return true; 183 } 184 185 /** 186 * Returns the Fuzzy keys in the format expected by the constructor. 187 * @return the Fuzzy keys in the format expected by the constructor 188 */ 189 public List<Pair<byte[], byte[]>> getFuzzyKeys() { 190 List<Pair<byte[], byte[]>> returnList = new ArrayList<>(fuzzyKeysData.size()); 191 for (Pair<byte[], byte[]> fuzzyKey : fuzzyKeysData) { 192 Pair<byte[], byte[]> returnKey = new Pair<>(); 193 // This won't revert the original key's don't care values, but we don't care. 194 returnKey.setFirst(Arrays.copyOf(fuzzyKey.getFirst(), fuzzyKey.getFirst().length)); 195 byte[] returnMask = Arrays.copyOf(fuzzyKey.getSecond(), fuzzyKey.getSecond().length); 196 if (UNSAFE_UNALIGNED && isPreprocessedMask(returnMask)) { 197 // Revert the preprocessing. 198 for (int i = 0; i < returnMask.length; i++) { 199 if (returnMask[i] == -1) { 200 returnMask[i] = 0; // -1 >> 0 201 } else if (returnMask[i] == processedWildcardMask) { 202 returnMask[i] = 1; // 0 or 2 >> 1 depending on mask version 203 } 204 } 205 } 206 returnKey.setSecond(returnMask); 207 returnList.add(returnKey); 208 } 209 return returnList; 210 } 211 212 @Deprecated 213 @Override 214 public ReturnCode filterKeyValue(final Cell c) { 215 return filterCell(c); 216 } 217 218 @Override 219 public void reset() throws IOException { 220 filterRow = false; 221 } 222 223 @Override 224 public boolean filterRow() throws IOException { 225 return filterRow; 226 } 227 228 @Override 229 public ReturnCode filterCell(final Cell c) { 230 final int startIndex = Math.max(lastFoundIndex, 0); 231 final int size = fuzzyKeysData.size(); 232 for (int i = startIndex; i < size + startIndex; i++) { 233 final int index = i % size; 234 Pair<byte[], byte[]> fuzzyData = fuzzyKeysData.get(index); 235 idempotentMaskShift(fuzzyData.getSecond()); 236 SatisfiesCode satisfiesCode = satisfies(isReversed(), c.getRowArray(), c.getRowOffset(), 237 c.getRowLength(), fuzzyData.getFirst(), fuzzyData.getSecond()); 238 if (satisfiesCode == SatisfiesCode.YES) { 239 lastFoundIndex = index; 240 return ReturnCode.INCLUDE; 241 } 242 } 243 // NOT FOUND -> seek next using hint 244 lastFoundIndex = -1; 245 filterRow = true; 246 return ReturnCode.SEEK_NEXT_USING_HINT; 247 } 248 249 static void idempotentMaskShift(byte[] mask) { 250 // This shift is idempotent - always end up with 0 and -1 as mask values. 251 // This works regardless of mask version, because both 0 >> 2 and 2 >> 2 252 // result in 0. 253 for (int j = 0; j < mask.length; j++) { 254 mask[j] >>= 2; 255 } 256 } 257 258 @Override 259 public Cell getNextCellHint(Cell currentCell) { 260 boolean result = tracker.updateTracker(currentCell); 261 if (!result) { 262 done = true; 263 return null; 264 } 265 byte[] nextRowKey = tracker.nextRow(); 266 return PrivateCellUtil.createFirstOnRow(nextRowKey, 0, (short) nextRowKey.length); 267 } 268 269 /** 270 * If we have multiple fuzzy keys, row tracker should improve overall performance. It calculates 271 * all next rows (one per every fuzzy key) and put them (the fuzzy key is bundled) into a priority 272 * queue so that the smallest row key always appears at queue head, which helps to decide the 273 * "Next Cell Hint". As scanning going on, the number of candidate rows in the RowTracker will 274 * remain the size of fuzzy keys until some of the fuzzy keys won't possibly have matches any 275 * more. 276 */ 277 private class RowTracker { 278 private final PriorityQueue<Pair<byte[], Pair<byte[], byte[]>>> nextRows; 279 private boolean initialized = false; 280 281 RowTracker() { 282 nextRows = new PriorityQueue<>(fuzzyKeysData.size(), 283 new Comparator<Pair<byte[], Pair<byte[], byte[]>>>() { 284 @Override 285 public int compare(Pair<byte[], Pair<byte[], byte[]>> o1, 286 Pair<byte[], Pair<byte[], byte[]>> o2) { 287 return isReversed() 288 ? Bytes.compareTo(o2.getFirst(), o1.getFirst()) 289 : Bytes.compareTo(o1.getFirst(), o2.getFirst()); 290 } 291 }); 292 } 293 294 byte[] nextRow() { 295 if (nextRows.isEmpty()) { 296 throw new IllegalStateException("NextRows should not be empty, " 297 + "make sure to call nextRow() after updateTracker() return true"); 298 } else { 299 return nextRows.peek().getFirst(); 300 } 301 } 302 303 boolean updateTracker(Cell currentCell) { 304 if (!initialized) { 305 for (Pair<byte[], byte[]> fuzzyData : fuzzyKeysData) { 306 updateWith(currentCell, fuzzyData); 307 } 308 initialized = true; 309 } else { 310 while (!nextRows.isEmpty() && !lessThan(currentCell, nextRows.peek().getFirst())) { 311 Pair<byte[], Pair<byte[], byte[]>> head = nextRows.poll(); 312 Pair<byte[], byte[]> fuzzyData = head.getSecond(); 313 updateWith(currentCell, fuzzyData); 314 } 315 } 316 return !nextRows.isEmpty(); 317 } 318 319 boolean lessThan(Cell currentCell, byte[] nextRowKey) { 320 int compareResult = 321 CellComparator.getInstance().compareRows(currentCell, nextRowKey, 0, nextRowKey.length); 322 return (!isReversed() && compareResult < 0) || (isReversed() && compareResult > 0); 323 } 324 325 void updateWith(Cell currentCell, Pair<byte[], byte[]> fuzzyData) { 326 byte[] nextRowKeyCandidate = 327 getNextForFuzzyRule(isReversed(), currentCell.getRowArray(), currentCell.getRowOffset(), 328 currentCell.getRowLength(), fuzzyData.getFirst(), fuzzyData.getSecond()); 329 if (nextRowKeyCandidate != null) { 330 nextRows.add(new Pair<>(nextRowKeyCandidate, fuzzyData)); 331 } 332 } 333 334 } 335 336 @Override 337 public boolean filterAllRemaining() { 338 return done; 339 } 340 341 /** Returns The filter serialized using pb */ 342 @Override 343 public byte[] toByteArray() { 344 FilterProtos.FuzzyRowFilter.Builder builder = FilterProtos.FuzzyRowFilter.newBuilder() 345 .setIsMaskV2(processedWildcardMask == V2_PROCESSED_WILDCARD_MASK); 346 for (Pair<byte[], byte[]> fuzzyData : fuzzyKeysData) { 347 BytesBytesPair.Builder bbpBuilder = BytesBytesPair.newBuilder(); 348 bbpBuilder.setFirst(UnsafeByteOperations.unsafeWrap(fuzzyData.getFirst())); 349 bbpBuilder.setSecond(UnsafeByteOperations.unsafeWrap(fuzzyData.getSecond())); 350 builder.addFuzzyKeysData(bbpBuilder); 351 } 352 return builder.build().toByteArray(); 353 } 354 355 /** 356 * Parse a serialized representation of {@link FuzzyRowFilter} 357 * @param pbBytes A pb serialized {@link FuzzyRowFilter} instance 358 * @return An instance of {@link FuzzyRowFilter} made from <code>bytes</code> 359 * @throws DeserializationException if an error occurred 360 * @see #toByteArray 361 */ 362 public static FuzzyRowFilter parseFrom(final byte[] pbBytes) throws DeserializationException { 363 FilterProtos.FuzzyRowFilter proto; 364 try { 365 proto = FilterProtos.FuzzyRowFilter.parseFrom(pbBytes); 366 } catch (InvalidProtocolBufferException e) { 367 throw new DeserializationException(e); 368 } 369 int count = proto.getFuzzyKeysDataCount(); 370 ArrayList<Pair<byte[], byte[]>> fuzzyKeysData = new ArrayList<>(count); 371 for (int i = 0; i < count; ++i) { 372 BytesBytesPair current = proto.getFuzzyKeysData(i); 373 byte[] keyBytes = current.getFirst().toByteArray(); 374 byte[] keyMeta = current.getSecond().toByteArray(); 375 fuzzyKeysData.add(new Pair<>(keyBytes, keyMeta)); 376 } 377 byte processedWildcardMask = proto.hasIsMaskV2() && proto.getIsMaskV2() 378 ? V2_PROCESSED_WILDCARD_MASK 379 : V1_PROCESSED_WILDCARD_MASK; 380 return new FuzzyRowFilter(fuzzyKeysData, processedWildcardMask); 381 } 382 383 @Override 384 public String toString() { 385 final StringBuilder sb = new StringBuilder(); 386 sb.append("FuzzyRowFilter"); 387 sb.append("{fuzzyKeysData="); 388 for (Pair<byte[], byte[]> fuzzyData : fuzzyKeysData) { 389 sb.append('{').append(Bytes.toStringBinary(fuzzyData.getFirst())).append(":"); 390 sb.append(Bytes.toStringBinary(fuzzyData.getSecond())).append('}'); 391 } 392 sb.append("}, "); 393 return sb.toString(); 394 } 395 396 // Utility methods 397 398 static enum SatisfiesCode { 399 /** row satisfies fuzzy rule */ 400 YES, 401 /** row doesn't satisfy fuzzy rule, but there's possible greater row that does */ 402 NEXT_EXISTS, 403 /** row doesn't satisfy fuzzy rule and there's no greater row that does */ 404 NO_NEXT 405 } 406 407 @InterfaceAudience.Private 408 static SatisfiesCode satisfies(byte[] row, byte[] fuzzyKeyBytes, byte[] fuzzyKeyMeta) { 409 return satisfies(false, row, 0, row.length, fuzzyKeyBytes, fuzzyKeyMeta); 410 } 411 412 @InterfaceAudience.Private 413 static SatisfiesCode satisfies(boolean reverse, byte[] row, byte[] fuzzyKeyBytes, 414 byte[] fuzzyKeyMeta) { 415 return satisfies(reverse, row, 0, row.length, fuzzyKeyBytes, fuzzyKeyMeta); 416 } 417 418 static SatisfiesCode satisfies(boolean reverse, byte[] row, int offset, int length, 419 byte[] fuzzyKeyBytes, byte[] fuzzyKeyMeta) { 420 421 if (!UNSAFE_UNALIGNED) { 422 return satisfiesNoUnsafe(reverse, row, offset, length, fuzzyKeyBytes, fuzzyKeyMeta); 423 } 424 425 if (row == null) { 426 // do nothing, let scan to proceed 427 return SatisfiesCode.YES; 428 } 429 length = Math.min(length, fuzzyKeyBytes.length); 430 int numWords = length / Bytes.SIZEOF_LONG; 431 432 int j = numWords << 3; // numWords * SIZEOF_LONG; 433 434 for (int i = 0; i < j; i += Bytes.SIZEOF_LONG) { 435 long fuzzyBytes = Bytes.toLong(fuzzyKeyBytes, i); 436 long fuzzyMeta = Bytes.toLong(fuzzyKeyMeta, i); 437 long rowValue = Bytes.toLong(row, offset + i); 438 if ((rowValue & fuzzyMeta) != fuzzyBytes) { 439 // We always return NEXT_EXISTS 440 return SatisfiesCode.NEXT_EXISTS; 441 } 442 } 443 444 int off = j; 445 446 if (length - off >= Bytes.SIZEOF_INT) { 447 int fuzzyBytes = Bytes.toInt(fuzzyKeyBytes, off); 448 int fuzzyMeta = Bytes.toInt(fuzzyKeyMeta, off); 449 int rowValue = Bytes.toInt(row, offset + off); 450 if ((rowValue & fuzzyMeta) != fuzzyBytes) { 451 // We always return NEXT_EXISTS 452 return SatisfiesCode.NEXT_EXISTS; 453 } 454 off += Bytes.SIZEOF_INT; 455 } 456 457 if (length - off >= Bytes.SIZEOF_SHORT) { 458 short fuzzyBytes = Bytes.toShort(fuzzyKeyBytes, off); 459 short fuzzyMeta = Bytes.toShort(fuzzyKeyMeta, off); 460 short rowValue = Bytes.toShort(row, offset + off); 461 if ((rowValue & fuzzyMeta) != fuzzyBytes) { 462 // We always return NEXT_EXISTS 463 // even if it does not (in this case getNextForFuzzyRule 464 // will return null) 465 return SatisfiesCode.NEXT_EXISTS; 466 } 467 off += Bytes.SIZEOF_SHORT; 468 } 469 470 if (length - off >= Bytes.SIZEOF_BYTE) { 471 int fuzzyBytes = fuzzyKeyBytes[off] & 0xff; 472 int fuzzyMeta = fuzzyKeyMeta[off] & 0xff; 473 int rowValue = row[offset + off] & 0xff; 474 if ((rowValue & fuzzyMeta) != fuzzyBytes) { 475 // We always return NEXT_EXISTS 476 return SatisfiesCode.NEXT_EXISTS; 477 } 478 } 479 return SatisfiesCode.YES; 480 } 481 482 static SatisfiesCode satisfiesNoUnsafe(boolean reverse, byte[] row, int offset, int length, 483 byte[] fuzzyKeyBytes, byte[] fuzzyKeyMeta) { 484 if (row == null) { 485 // do nothing, let scan to proceed 486 return SatisfiesCode.YES; 487 } 488 489 Order order = Order.orderFor(reverse); 490 boolean nextRowKeyCandidateExists = false; 491 492 for (int i = 0; i < fuzzyKeyMeta.length && i < length; i++) { 493 // First, checking if this position is fixed and not equals the given one 494 boolean byteAtPositionFixed = fuzzyKeyMeta[i] == 0; 495 boolean fixedByteIncorrect = byteAtPositionFixed && fuzzyKeyBytes[i] != row[i + offset]; 496 if (fixedByteIncorrect) { 497 // in this case there's another row that satisfies fuzzy rule and bigger than this row 498 if (nextRowKeyCandidateExists) { 499 return SatisfiesCode.NEXT_EXISTS; 500 } 501 502 // If this row byte is less than fixed then there's a byte array bigger than 503 // this row and which satisfies the fuzzy rule. Otherwise there's no such byte array: 504 // this row is simply bigger than any byte array that satisfies the fuzzy rule 505 boolean rowByteLessThanFixed = (row[i + offset] & 0xFF) < (fuzzyKeyBytes[i] & 0xFF); 506 if (rowByteLessThanFixed && !reverse) { 507 return SatisfiesCode.NEXT_EXISTS; 508 } else if (!rowByteLessThanFixed && reverse) { 509 return SatisfiesCode.NEXT_EXISTS; 510 } else { 511 return SatisfiesCode.NO_NEXT; 512 } 513 } 514 515 // Second, checking if this position is not fixed and byte value is not the biggest. In this 516 // case there's a byte array bigger than this row and which satisfies the fuzzy rule. To get 517 // bigger byte array that satisfies the rule we need to just increase this byte 518 // (see the code of getNextForFuzzyRule below) by one. 519 // Note: if non-fixed byte is already at biggest value, this doesn't allow us to say there's 520 // bigger one that satisfies the rule as it can't be increased. 521 if (fuzzyKeyMeta[i] == 1 && !order.isMax(fuzzyKeyBytes[i])) { 522 nextRowKeyCandidateExists = true; 523 } 524 } 525 return SatisfiesCode.YES; 526 } 527 528 @InterfaceAudience.Private 529 static byte[] getNextForFuzzyRule(byte[] row, byte[] fuzzyKeyBytes, byte[] fuzzyKeyMeta) { 530 return getNextForFuzzyRule(false, row, 0, row.length, fuzzyKeyBytes, fuzzyKeyMeta); 531 } 532 533 @InterfaceAudience.Private 534 static byte[] getNextForFuzzyRule(boolean reverse, byte[] row, byte[] fuzzyKeyBytes, 535 byte[] fuzzyKeyMeta) { 536 return getNextForFuzzyRule(reverse, row, 0, row.length, fuzzyKeyBytes, fuzzyKeyMeta); 537 } 538 539 /** Abstracts directional comparisons based on scan direction. */ 540 private enum Order { 541 ASC { 542 @Override 543 public boolean lt(int lhs, int rhs) { 544 return lhs < rhs; 545 } 546 547 @Override 548 public boolean gt(int lhs, int rhs) { 549 return lhs > rhs; 550 } 551 552 @Override 553 public byte inc(byte val) { 554 // TODO: what about over/underflow? 555 return (byte) (val + 1); 556 } 557 558 @Override 559 public boolean isMax(byte val) { 560 return val == (byte) 0xff; 561 } 562 563 @Override 564 public byte min() { 565 return 0; 566 } 567 }, 568 DESC { 569 @Override 570 public boolean lt(int lhs, int rhs) { 571 return lhs > rhs; 572 } 573 574 @Override 575 public boolean gt(int lhs, int rhs) { 576 return lhs < rhs; 577 } 578 579 @Override 580 public byte inc(byte val) { 581 // TODO: what about over/underflow? 582 return (byte) (val - 1); 583 } 584 585 @Override 586 public boolean isMax(byte val) { 587 return val == 0; 588 } 589 590 @Override 591 public byte min() { 592 return (byte) 0xFF; 593 } 594 }; 595 596 public static Order orderFor(boolean reverse) { 597 return reverse ? DESC : ASC; 598 } 599 600 /** Returns true when {@code lhs < rhs}. */ 601 public abstract boolean lt(int lhs, int rhs); 602 603 /** Returns true when {@code lhs > rhs}. */ 604 public abstract boolean gt(int lhs, int rhs); 605 606 /** Returns {@code val} incremented by 1. */ 607 public abstract byte inc(byte val); 608 609 /** Return true when {@code val} is the maximum value */ 610 public abstract boolean isMax(byte val); 611 612 /** Return the minimum value according to this ordering scheme. */ 613 public abstract byte min(); 614 } 615 616 /** 617 * Find out the closes next byte array that satisfies fuzzy rule and is after the given one. In 618 * the reverse case it returns increased byte array to make sure that the proper row is selected 619 * next. 620 * @return byte array which is after the given row and which satisfies the fuzzy rule if it 621 * exists, null otherwise 622 */ 623 @InterfaceAudience.Private 624 static byte[] getNextForFuzzyRule(boolean reverse, byte[] row, int offset, int length, 625 byte[] fuzzyKeyBytes, byte[] fuzzyKeyMeta) { 626 // To find out the closest next byte array that satisfies fuzzy rule and is after the given one 627 // we do the following: 628 // 1. setting values on all "fixed" positions to the values from fuzzyKeyBytes 629 // 2. if during the first step given row did not increase, then we increase the value at 630 // the first "non-fixed" position (where it is not maximum already) 631 632 // It is easier to perform this by using fuzzyKeyBytes copy and setting "non-fixed" position 633 // values than otherwise. 634 byte[] result = Arrays.copyOf(fuzzyKeyBytes, Math.max(length, fuzzyKeyBytes.length)); 635 if (reverse) { 636 // we need 0xff's instead of 0x00's 637 for (int i = 0; i < result.length; i++) { 638 if (result[i] == 0) { 639 result[i] = (byte) 0xFF; 640 } 641 } 642 } 643 int toInc = -1; 644 final Order order = Order.orderFor(reverse); 645 646 boolean increased = false; 647 for (int i = 0; i < result.length; i++) { 648 if (i >= fuzzyKeyMeta.length || fuzzyKeyMeta[i] == 0 /* non-fixed */) { 649 result[i] = row[offset + i]; 650 if (!order.isMax(row[offset + i])) { 651 // this is "non-fixed" position and is not at max value, hence we can increase it 652 toInc = i; 653 } 654 } else if (i < fuzzyKeyMeta.length && fuzzyKeyMeta[i] == -1 /* fixed */) { 655 if (order.lt((row[i + offset] & 0xFF), (fuzzyKeyBytes[i] & 0xFF))) { 656 // if setting value for any fixed position increased the original array, 657 // we are OK 658 increased = true; 659 break; 660 } 661 662 if (order.gt((row[i + offset] & 0xFF), (fuzzyKeyBytes[i] & 0xFF))) { 663 // if setting value for any fixed position makes array "smaller", then just stop: 664 // in case we found some non-fixed position to increase we will do it, otherwise 665 // there's no "next" row key that satisfies fuzzy rule and "greater" than given row 666 break; 667 } 668 } 669 } 670 671 if (!increased) { 672 if (toInc < 0) { 673 return null; 674 } 675 result[toInc] = order.inc(result[toInc]); 676 677 // Setting all "non-fixed" positions to zeroes to the right of the one we increased so 678 // that found "next" row key is the smallest possible 679 for (int i = toInc + 1; i < result.length; i++) { 680 if (i >= fuzzyKeyMeta.length || fuzzyKeyMeta[i] == 0 /* non-fixed */) { 681 result[i] = order.min(); 682 } 683 } 684 } 685 686 byte[] trailingZerosTrimmed = trimTrailingZeroes(result, fuzzyKeyMeta, toInc); 687 if (reverse) { 688 // In the reverse case we increase last non-max byte to make sure that the proper row is 689 // selected next. 690 return PrivateCellUtil.increaseLastNonMaxByte(trailingZerosTrimmed); 691 } else { 692 return trailingZerosTrimmed; 693 } 694 } 695 696 /** 697 * For forward scanner, next cell hint should not contain any trailing zeroes unless they are part 698 * of fuzzyKeyMeta hint = '\x01\x01\x01\x00\x00' will skip valid row '\x01\x01\x01' 699 * @param toInc - position of incremented byte 700 * @return trimmed version of result 701 */ 702 703 private static byte[] trimTrailingZeroes(byte[] result, byte[] fuzzyKeyMeta, int toInc) { 704 int off = fuzzyKeyMeta.length >= result.length ? result.length - 1 : fuzzyKeyMeta.length - 1; 705 for (; off >= 0; off--) { 706 if (fuzzyKeyMeta[off] != 0) break; 707 } 708 if (off < toInc) off = toInc; 709 byte[] retValue = new byte[off + 1]; 710 System.arraycopy(result, 0, retValue, 0, retValue.length); 711 return retValue; 712 } 713 714 /** 715 * Returns true if and only if the fields of the filter that are serialized are equal to the 716 * corresponding fields in other. Used for testing. 717 */ 718 @Override 719 boolean areSerializedFieldsEqual(Filter o) { 720 if (o == this) { 721 return true; 722 } 723 if (!(o instanceof FuzzyRowFilter)) { 724 return false; 725 } 726 FuzzyRowFilter other = (FuzzyRowFilter) o; 727 if (this.fuzzyKeysData.size() != other.fuzzyKeysData.size()) return false; 728 for (int i = 0; i < fuzzyKeysData.size(); ++i) { 729 Pair<byte[], byte[]> thisData = this.fuzzyKeysData.get(i); 730 Pair<byte[], byte[]> otherData = other.fuzzyKeysData.get(i); 731 if ( 732 !(Bytes.equals(thisData.getFirst(), otherData.getFirst()) 733 && Bytes.equals(thisData.getSecond(), otherData.getSecond())) 734 ) { 735 return false; 736 } 737 } 738 return true; 739 } 740 741 @Override 742 public boolean equals(Object obj) { 743 return obj instanceof Filter && areSerializedFieldsEqual((Filter) obj); 744 } 745 746 @Override 747 public int hashCode() { 748 return Objects.hash(this.fuzzyKeysData); 749 } 750}