confluentinc · blueedgenick · Jun 5, 2020 · Jun 4, 2020 · Jun 4, 2020 · Jun 5, 2020
@@ -156,6 +156,55 @@ Given an array, checks if a search value is contained in the array.
 
 Accepts any `ARRAY` type. The type of the second param must match the element type of the `ARRAY`.
 
+### ``ARRAY_DISTINCT``
+
+```sql
+ARRAY_DISTINCT([1, 2, 3])
+```
+
+Returns an array of all the distinct values, including NULL if present, from the input array.
+The output array elements are in order of their first occurrence in the input.
+
+Returns NULL if the input array is NULL.
+
+Examples:
+```sql 
+ARRAY_DISTINCT(ARRAY[1, 1, 2, 3, 1, 2])  => [1, 2, 3]
+ARRAY_DISTINCT(ARRAY['apple', 'apple', NULL, 'cherry'])  => ['apple', NULL, 'cherry']
+```
+
+### ``ARRAY_EXCEPT``
+
+```sql
+ARRAY_EXCEPT(array1, array2)
+```
+
+Returns an array of all the distinct elements from an array, except for those also present in a second array. The order of entries in the first array is preserved but duplicates are removed. 
+
+Returns NULL if either input is NULL.
+
+Examples:
+```sql 
+ARRAY_EXCEPT(ARRAY[1, 2, 3, 1, 2], [2, 3])  => [1]
+ARRAY_EXCEPT(ARRAY['apple', 'apple', NULL, 'cherry'], ARRAY['cherry'])  => ['apple', NULL]
+```
+
+### ``ARRAY_INTERSECT``
+
+```sql
+ARRAY_INTERSECT(array1, array2)
+```
+
+Returns an array of all the distinct elements from the intersection of both input arrays. The order of entries in the output is the same as in the first input array.
+
+Returns NULL if either input array is NULL.
+
+Examples:
+```sql 
+ARRAY_INTERSECT(ARRAY[1, 2, 3, 1, 2], [2, 1])  => [1, 2]
+ARRAY_INTERSECT(ARRAY['apple', 'apple', NULL, 'cherry'], ARRAY['apple'])  => ['apple']
+```
+
 ### `ARRAY_LENGTH`
 
 ```sql
@@ -213,6 +262,22 @@ If the array field is NULL then NULL is returned.
 
 An optional second parameter can be used to specify whether to sort the elements in 'ASC'ending or 'DESC'ending order. If neither is specified then the default is ascending order. 
 
+### ``ARRAY_UNION``
+
+```sql
+ARRAY_UNION(array1, array2)
+```
+
+Returns an array of all the distinct elements from both input arrays, in the order in which they are first encountered.
+
+Returns NULL if either input array is NULL.
+
+Examples:
+```sql 
+ARRAY_UNION(ARRAY[1, 2, 3, 1, 2], [4, 1])  => [1, 2, 3, 4]
+ARRAY_UNION(ARRAY['apple', 'apple', NULL, 'cherry'], ARRAY['cherry'])  => ['apple', NULL, 'cherry']
+```
+
 ### `AS_MAP`
 
 ```sql

@@ -0,0 +1,47 @@
+/*
+ * Copyright 2020 Confluent Inc.
+ *
+ * Licensed under the Confluent Community License; you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.confluent.io/confluent-community-license
+ *
+ * Unless required by applicable law or agreed to in writing, software distributed under the License
+ * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and limitations under the
+ * License.
+ */
+
+package io.confluent.ksql.function.udf.array;
+
+import com.google.common.collect.Sets;
+import io.confluent.ksql.function.udf.Udf;
+import io.confluent.ksql.function.udf.UdfDescription;
+import io.confluent.ksql.function.udf.UdfParameter;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Set;
+
+@UdfDescription(
+    name = "array_distinct",
+    description = "Returns an array of all the distinct values, including NULL if present, from"
+        + " the input array."
+        + " The output array elements will be in order of their first occurrence in the input."
+        + " Returns NULL if the input array is NULL.")
+public class ArrayDistinct {
+
+  @Udf
+  public <T> List<T> distinct(
+      @UdfParameter(description = "Array of values to distinct") final List<T> input) {
+    if (input == null) {
+      return null;
+    }
+    final Set<T> distinctVals = Sets.newLinkedHashSetWithExpectedSize(input.size());
+    input.forEach(entry -> {
+      distinctVals.add(entry);
+    });
+    final List<T> output = new ArrayList<T>(distinctVals);
+    return output;
+  }
+
+}
@@ -0,0 +1,48 @@
+/*
+ * Copyright 2020 Confluent Inc.
+ *
+ * Licensed under the Confluent Community License; you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.confluent.io/confluent-community-license
+ *
+ * Unless required by applicable law or agreed to in writing, software distributed under the License
+ * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and limitations under the
+ * License.
+ */
+
+package io.confluent.ksql.function.udf.array;
+
+import io.confluent.ksql.function.udf.Udf;
+import io.confluent.ksql.function.udf.UdfDescription;
+import io.confluent.ksql.function.udf.UdfParameter;
+import java.util.HashSet;
+import java.util.LinkedHashSet;
+import java.util.List;
+import java.util.Set;
+import java.util.stream.Collectors;
+
+@UdfDescription(
+    name = "array_except",
+    description = "Returns an array of all the elements in an array except for those also present"
+        + " in a second array. The order of entries in the first array is preserved although any"
+        + " duplicates are removed. Returns NULL if either input is NULL.")
+public class ArrayExcept {
+
+  @Udf
+  public <T> List<T> except(
+      @UdfParameter(description = "Array of values") final List<T> left,
+      @UdfParameter(description = "Array of exceptions") final List<T> right) {
+    if (left == null || right == null) {
+      return null;
+    }
+    final Set<T> distinctRightValues = new HashSet<>(right);
+    final Set<T> distinctLeftValues = new LinkedHashSet<>(left);
+    final List<T> result = distinctLeftValues
+        .stream()
+        .filter(e -> !distinctRightValues.contains(e))
+        .collect(Collectors.toList());
+    return result;
+  }
+}
@@ -0,0 +1,44 @@
+/*
+ * Copyright 2020 Confluent Inc.
+ *
+ * Licensed under the Confluent Community License; you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.confluent.io/confluent-community-license
+ *
+ * Unless required by applicable law or agreed to in writing, software distributed under the License
+ * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and limitations under the
+ * License.
+ */
+
+package io.confluent.ksql.function.udf.array;
+
+import com.google.common.collect.Lists;
+import com.google.common.collect.Sets;
+import io.confluent.ksql.function.udf.Udf;
+import io.confluent.ksql.function.udf.UdfDescription;
+import io.confluent.ksql.function.udf.UdfParameter;
+import java.util.List;
+import java.util.Set;
+
+@UdfDescription(
+    name = "array_intersect",
+    description = "Returns an array of all the distinct elements from the intersection of both"
+        + " input arrays, or NULL if either input array is NULL. The order of entries in the"
+        + " output is the same as in the first input array.")
+public class ArrayIntersect {
+
+  @Udf
+  public <T> List<T> intersect(
+      @UdfParameter(description = "First array of values") final List<T> left,
+      @UdfParameter(description = "Second array of values") final List<T> right) {
+    if (left == null || right == null) {
+      return null;
+    }
+    final Set<T> intersection = Sets.newLinkedHashSet(left);
+    intersection.retainAll(Sets.newHashSet(right));
+    return Lists.newArrayList(intersection);
+  }
+
+}
@@ -0,0 +1,44 @@
+/*
+ * Copyright 2020 Confluent Inc.
+ *
+ * Licensed under the Confluent Community License; you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.confluent.io/confluent-community-license
+ *
+ * Unless required by applicable law or agreed to in writing, software distributed under the License
+ * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and limitations under the
+ * License.
+ */
+
+package io.confluent.ksql.function.udf.array;
+
+import com.google.common.collect.Sets;
+import io.confluent.ksql.function.udf.Udf;
+import io.confluent.ksql.function.udf.UdfDescription;
+import io.confluent.ksql.function.udf.UdfParameter;
+import java.util.Arrays;
+import java.util.List;
+import java.util.Set;
+
+@UdfDescription(
+    name = "array_union",
+    description = "Returns an array of all the distinct elements from both input arrays, "
+        + "or NULL if either array is NULL.")
+public class ArrayUnion {
+
+  @SuppressWarnings("unchecked")
+  @Udf
+  public <T> List<T> union(
+      @UdfParameter(description = "First array of values") final List<T> left,
+      @UdfParameter(description = "Second array of values") final List<T> right) {
+    if (left == null || right == null) {
+      return null;
+    }
+    final Set<T> combined = Sets.newLinkedHashSet(left);
+    combined.addAll(right);
+    return (List<T>) Arrays.asList(combined.toArray());
+  }
+
+}
@@ -0,0 +1,90 @@
+/*
+ * Copyright 2020 Confluent Inc.
+ *
+ * Licensed under the Confluent Community License; you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.confluent.io/confluent-community-license
+ *
+ * Unless required by applicable law or agreed to in writing, software distributed under the License
+ * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and limitations under the
+ * License.
+ */
+
+package io.confluent.ksql.function.udf.array;
+
+import static org.hamcrest.CoreMatchers.is;
+import static org.hamcrest.CoreMatchers.nullValue;
+import static org.hamcrest.MatcherAssert.assertThat;
+import static org.hamcrest.collection.IsIterableContainingInOrder.contains;
+
+import com.google.common.collect.ImmutableMap;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.List;
+import java.util.Map;
+import org.junit.Test;
+
+public class ArrayDistinctTest {
+  private final ArrayDistinct udf = new ArrayDistinct();
+
+  @Test
+  public void shouldDistinctArray() {
+    final List<String> result = udf.distinct(Arrays.asList("foo", " ", "foo", "bar"));
+    assertThat(result, contains("foo", " ", "bar"));
+  }
+
+  @Test
+  public void shouldNotChangeDistinctArray() {
+    final List<String> result = udf.distinct(Arrays.asList("foo", " ", "bar"));
+    assertThat(result, contains("foo", " ", "bar"));
+  }
+
+  @Test
+  public void shouldDistinctIntArray() {
+    final List<Integer> result = udf.distinct(Arrays.asList(1, 2, 3, 2, 1));
+    assertThat(result, contains(1, 2, 3));
+  }
+
+  @SuppressWarnings("unchecked")
+  @Test
+  public void shouldDistinctArrayOfMaps() {
+    final Map<String, Integer> map1 = ImmutableMap.of("foo", 1, "bar", 2, "baz", 3);
+    final Map<String, Integer> map2 = ImmutableMap.of("foo", 10, "baz", 3);
+    final Map<String, Integer> map3 = ImmutableMap.of("foo", 1, "bar", 2, "baz", 3);
+    final List<Map<String, Integer>> result = udf.distinct(Arrays.asList(map1, map2, map3));
+    assertThat(result, contains(map1, map2));
+  }
+
+  @SuppressWarnings("unchecked")
+  @Test
+  public void shouldDistinctArrayOfLists() {
+    final List<String> list1 = Arrays.asList("foo", "bar", "baz");
+    final List<String> list2 = Arrays.asList("foo", "bar");
+    final List<String> list3 = Arrays.asList("foo", "bar", "baz");
+    final List<List<String>> result = udf.distinct(Arrays.asList(list1, list2, list3, null));
+    assertThat(result, contains(list1, list2, null));
+  }
+
+  @Test
+  public void shouldReturnEmptyForEmptyInput() {
+    final List<Double> result = udf.distinct(new ArrayList<Double>());
+    assertThat(result, is(Collections.EMPTY_LIST));
+  }
+
+  @Test
+  public void shouldReturnNullForNullInput() {
+    final List<Double> result = udf.distinct((List<Double>) null);
+    assertThat(result, is(nullValue()));
+  }
+
+  @Test
+  public void shouldConsiderNullAsDistinctValue() {
+    final List<Object> result = udf.distinct(Arrays.asList(1, 2, 1, null, 2, null, 3, 1));
+    assertThat(result, contains(1, 2, null, 3));
+  }
+
+
+}