Skip to content

Commit

Permalink
Vector search should be able to restrict on clustering keys when filt…
Browse files Browse the repository at this point in the history
…ering isn't required

 patch by Mick Semb Wever; reviewed by Caleb Rackliffe for CASSANDRA-19544
  • Loading branch information
michaelsembwever committed Apr 10, 2024
1 parent 7c29439 commit cada1a1
Show file tree
Hide file tree
Showing 5 changed files with 53 additions and 8 deletions.
1 change: 1 addition & 0 deletions CHANGES.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
5.0-beta2
* Vector search can restrict on clustering keys when filtering isn't required (CASSANDRA-19544)
* Fix FBUtilities' parsing of gcp cos_containerd kernel versions (CASSANDRA-18594)
* Clean up KeyRangeIterator classes (CASSANDRA-19428)
* Warn clients about possible consistency violations for filtering queries against multiple mutable columns (CASSANDRA-19489)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -317,8 +317,15 @@ else if (relation.isLIKE())
var nonIndexedColumns = Stream.concat(nonAnnColumns.stream(), clusteringColumns.stream())
.filter(c -> indexRegistry.listIndexes().stream().noneMatch(i -> i.dependsOn(c)))
.collect(Collectors.toList());

if (!nonIndexedColumns.isEmpty())
throw invalidRequest(StatementRestrictions.ANN_REQUIRES_INDEXED_FILTERING_MESSAGE);
{
// restrictions on non-clustering columns, or clusterings that still need filtering, are invalid
if (!clusteringColumns.containsAll(nonIndexedColumns)
|| partitionKeyRestrictions.hasUnrestrictedPartitionKeyComponents(table)
|| clusteringColumnsRestrictions.needFiltering())
throw invalidRequest(StatementRestrictions.ANN_REQUIRES_INDEXED_FILTERING_MESSAGE);
}
}
}
else
Expand Down Expand Up @@ -467,7 +474,7 @@ public boolean isColumnRestrictedByEq(ColumnMetadata columnDef)

/**
* This method determines whether a specified column is restricted on equality or something equivalent, like IN.
* It can be used in conjunction with the columns selected by a query to determine which of those columns is
* It can be used in conjunction with the columns selected by a query to determine which of those columns is
* already bound by the client (and from its perspective, not retrieved by the database).
*
* @param column a column from the same table these restrictions are against
Expand Down Expand Up @@ -779,8 +786,8 @@ public RowFilter getRowFilter(IndexRegistry indexRegistry, QueryOptions options)
if (filterRestrictions.isEmpty())
return RowFilter.none();

// If there is only one replica, we don't need reconciliation at any consistency level.
boolean needsReconciliation = !table.isVirtual()
// If there is only one replica, we don't need reconciliation at any consistency level.
boolean needsReconciliation = !table.isVirtual()
&& options.getConsistency().needsReconciliation()
&& Keyspace.open(table.keyspace).getReplicationStrategy().getReplicationFactor().allReplicas > 1;

Expand Down Expand Up @@ -1041,7 +1048,7 @@ public boolean returnStaticContentOnPartitionWithNoRows()
// a full partition query, then we include that content.
return queriesFullPartitions();
}

@Override
public String toString()
{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -120,7 +120,7 @@ public class SelectStatement implements CQLStatement.SingleKeyspaceCqlStatement
"/ LOCAL_ONE / NODE_LOCAL. Consistency level %s was requested. " +
"Downgrading the consistency level to %s.";
public static final String TOPK_PAGE_SIZE_WARNING = "Top-K queries do not support paging and the page size is set to %d, " +
"which is less than LIMIT %d. The page size has been set to %<d to match the LIMIT.";
"which is less than LIMIT %d. The page size has been set to %d to match the LIMIT.";

public final VariableSpecifications bindVariables;
public final TableMetadata table;
Expand Down Expand Up @@ -325,7 +325,7 @@ public ResultMessage.Rows execute(QueryState state, QueryOptions options, long q
pageSize = limit.count();
limit = getDataLimits(userLimit, userPerPartitionLimit, pageSize, aggregationSpec);
options = QueryOptions.withPageSize(options, pageSize);
ClientWarn.instance.warn(String.format(TOPK_PAGE_SIZE_WARNING, oldPageSize, limit.count()));
ClientWarn.instance.warn(String.format(TOPK_PAGE_SIZE_WARNING, oldPageSize, limit.count(), pageSize));
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -219,7 +219,8 @@ public void endTestSuite(final JUnitTest suite) throws BuildException {
{
// only include properties and system-out if there's failure/error
rootElement.appendChild(propsElement);
rootElement.appendChild(systemOutputElement);
if (null != systemOutputElement)
rootElement.appendChild(systemOutputElement);
}
if (out != null) {
Writer wri = null;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -324,6 +324,42 @@ public void cannotCreateIndexOnNonFloatVector()
.isInstanceOf(InvalidRequestException.class).hasRootCauseMessage(StorageAttachedIndex.VECTOR_NON_FLOAT_ERROR);
}

@Test
public void canOrderWithWhereOnPrimaryColumns() throws Throwable
{
createTable("CREATE TABLE %s (a int, b int, c int, d int, v vector<float, 2>, PRIMARY KEY ((a,b),c,d))");
createIndex("CREATE CUSTOM INDEX ON %s(v) USING 'StorageAttachedIndex'");

execute("INSERT INTO %s (a, b, c, d, v) VALUES (1, 2, 1, 2, [6.0,1.0])");

ResultSet result = execute("SELECT * FROM %s WHERE a = 1 AND b = 2 ORDER BY v ANN OF [2.0,1.0] LIMIT 1", ConsistencyLevel.ONE);
assertEquals(1, result.size());
result = execute("SELECT * FROM %s WHERE a = 1 AND b = 2 AND c = 1 ORDER BY v ANN OF [2.0,1.0] LIMIT 1", ConsistencyLevel.ONE);
assertEquals(1, result.size());
result = execute("SELECT * FROM %s WHERE a = 1 AND b = 2 AND c = 1 AND d = 2 ORDER BY v ANN OF [2.0,1.0] LIMIT 1", ConsistencyLevel.ONE);
assertEquals(1, result.size());

assertThatThrownBy(() -> executeNet("SELECT * FROM %s WHERE a = 1 AND b = 2 AND d = 2 ORDER BY v ANN OF [2.0,1.0] LIMIT 1"))
.isInstanceOf(InvalidQueryException.class).hasMessage(StatementRestrictions.ANN_REQUIRES_INDEXED_FILTERING_MESSAGE);

createIndex("CREATE CUSTOM INDEX c_idx ON %s(c) USING 'StorageAttachedIndex'");

assertThatThrownBy(() -> executeNet("SELECT * FROM %s WHERE a = 1 AND b = 2 AND d = 2 ORDER BY v ANN OF [2.0,1.0] LIMIT 1"))
.isInstanceOf(InvalidQueryException.class).hasMessage(StatementRestrictions.ANN_REQUIRES_INDEXED_FILTERING_MESSAGE);

dropIndex("DROP INDEX %s.c_idx");
createIndex("CREATE CUSTOM INDEX ON %s(d) USING 'StorageAttachedIndex'");

result = execute("SELECT * FROM %s WHERE a = 1 AND b = 2 AND c = 1 ORDER BY v ANN OF [2.0,1.0] LIMIT 1", ConsistencyLevel.ONE);
assertEquals(1, result.size());
result = execute("SELECT * FROM %s WHERE a = 1 AND b = 2 AND c = 1 AND d = 2 ORDER BY v ANN OF [2.0,1.0] LIMIT 1", ConsistencyLevel.ONE);
assertEquals(1, result.size());
result = execute("SELECT * FROM %s WHERE a = 1 AND b = 2 AND d = 2 ORDER BY v ANN OF [2.0,1.0] LIMIT 1", ConsistencyLevel.ONE);
assertEquals(1, result.size());
result = execute("SELECT * FROM %s WHERE a = 1 AND b = 2 AND c > 0 ORDER BY v ANN OF [2.0,1.0] LIMIT 1", ConsistencyLevel.ONE);
assertEquals(1, result.size());
}

@Test
public void canOnlyExecuteWithCorrectConsistencyLevel()
{
Expand Down

0 comments on commit cada1a1

Please sign in to comment.