Skip to content

Commit

Permalink
Support dictionary encoding for STRING data type. (#238)
Browse files Browse the repository at this point in the history
* Support dictionary encoding for STRING data type.
Move datatype-encoding map into TsFile.

* update method
  • Loading branch information
jt2594838 committed Sep 11, 2024
1 parent a90fe9a commit a9938ee
Show file tree
Hide file tree
Showing 2 changed files with 65 additions and 1 deletion.
Original file line number Diff line number Diff line change
Expand Up @@ -370,7 +370,7 @@ public static class Dictionary extends TSEncodingBuilder {

@Override
public Encoder getEncoder(TSDataType type) {
if (type == TSDataType.TEXT) {
if (type == TSDataType.TEXT || type == TSDataType.STRING) {
return new DictionaryEncoder();
}
throw new UnSupportedDataTypeException("DICTIONARY doesn't support data type: " + type);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,13 @@

package org.apache.tsfile.file.metadata.enums;

import org.apache.tsfile.enums.TSDataType;

import java.util.EnumMap;
import java.util.HashSet;
import java.util.Map;
import java.util.Set;

public enum TSEncoding {
PLAIN((byte) 0),
DICTIONARY((byte) 1),
Expand All @@ -37,6 +44,55 @@ public enum TSEncoding {
RLBE((byte) 13);
private final byte type;

@SuppressWarnings("java:S2386") // used by other projects
public static final Map<TSDataType, Set<TSEncoding>> TYPE_SUPPORTED_ENCODINGS =
new EnumMap<>(TSDataType.class);

static {
Set<TSEncoding> booleanSet = new HashSet<>();
booleanSet.add(TSEncoding.PLAIN);
booleanSet.add(TSEncoding.RLE);
TYPE_SUPPORTED_ENCODINGS.put(TSDataType.BOOLEAN, booleanSet);

Set<TSEncoding> intSet = new HashSet<>();
intSet.add(TSEncoding.PLAIN);
intSet.add(TSEncoding.RLE);
intSet.add(TSEncoding.TS_2DIFF);
intSet.add(TSEncoding.GORILLA);
intSet.add(TSEncoding.ZIGZAG);
intSet.add(TSEncoding.CHIMP);
intSet.add(TSEncoding.SPRINTZ);
intSet.add(TSEncoding.RLBE);

TYPE_SUPPORTED_ENCODINGS.put(TSDataType.INT32, intSet);
TYPE_SUPPORTED_ENCODINGS.put(TSDataType.INT64, intSet);
TYPE_SUPPORTED_ENCODINGS.put(TSDataType.TIMESTAMP, intSet);
TYPE_SUPPORTED_ENCODINGS.put(TSDataType.DATE, intSet);

Set<TSEncoding> floatSet = new HashSet<>();
floatSet.add(TSEncoding.PLAIN);
floatSet.add(TSEncoding.RLE);
floatSet.add(TSEncoding.TS_2DIFF);
floatSet.add(TSEncoding.GORILLA_V1);
floatSet.add(TSEncoding.GORILLA);
floatSet.add(TSEncoding.CHIMP);
floatSet.add(TSEncoding.SPRINTZ);
floatSet.add(TSEncoding.RLBE);

TYPE_SUPPORTED_ENCODINGS.put(TSDataType.FLOAT, floatSet);
TYPE_SUPPORTED_ENCODINGS.put(TSDataType.DOUBLE, floatSet);

Set<TSEncoding> textSet = new HashSet<>();
textSet.add(TSEncoding.PLAIN);
textSet.add(TSEncoding.DICTIONARY);
TYPE_SUPPORTED_ENCODINGS.put(TSDataType.TEXT, textSet);
TYPE_SUPPORTED_ENCODINGS.put(TSDataType.STRING, textSet);

Set<TSEncoding> blobSet = new HashSet<>();
blobSet.add(TSEncoding.PLAIN);
TYPE_SUPPORTED_ENCODINGS.put(TSDataType.BLOB, blobSet);
}

TSEncoding(byte type) {
this.type = type;
}
Expand Down Expand Up @@ -84,6 +140,14 @@ private static TSEncoding getTsEncoding(byte encoding) {
}
}

public static boolean isSupported(TSDataType type, TSEncoding encoding) {
return TYPE_SUPPORTED_ENCODINGS.get(type).contains(encoding);
}

public boolean isSupported(TSDataType type) {
return TYPE_SUPPORTED_ENCODINGS.get(type).contains(this);
}

public static int getSerializedSize() {
return Byte.BYTES;
}
Expand Down

0 comments on commit a9938ee

Please sign in to comment.