1. ozone cli 기초
- volume 리스트 확인
$ kinit -kt ozone.keytab om/`hostname`@GOODMIT.COM
$ ozone sh volume list
[ {
"metadata" : { },
"name" : "s3v",
"admin" : "om",
"owner" : "om",
"quotaInBytes" : -1,
"quotaInNamespace" : -1,
"usedNamespace" : 1,
"creationTime" : "2024-04-26T05:13:02.161Z",
"modificationTime" : "2024-04-26T05:13:02.161Z",
"acls" : [ {
"type" : "USER",
"name" : "om",
"aclScope" : "ACCESS",
"aclList" : [ "ALL" ]
} ],
"refCount" : 0
} ]
- bucket 리스트 확인
$ ozone sh bucket list o3://ygbaek07.gitcluster.com:9862/s3v/ <- OM 리더 확인
[ {
"metadata" : { },
"volumeName" : "s3v",
"name" : "cloudera-health-monitoring-ozone-basic-canary-bucket",
"storageType" : "DISK",
"versioning" : false,
"usedBytes" : 189,
"usedNamespace" : 1,
"creationTime" : "2024-04-26T05:36:47.158Z",
"modificationTime" : "2024-04-26T05:36:47.158Z",
"sourcePathExist" : true,
"quotaInBytes" : -1,
"quotaInNamespace" : -1,
"bucketLayout" : "FILE_SYSTEM_OPTIMIZED",
"owner" : "hue",
"link" : false
} ]
- volume 생성
$ ozone sh volume create --quota=<volumecapacity> --user=<username> URI
$ ozone sh volume create --user=gwadmin o3://ozone1/ice/
# ozone sh bucket list o3://ozone1/s3v/ <- 서비스네임으로 접근하면 리더 확인 필요없다
[ {
"metadata" : { },
"volumeName" : "s3v",
"name" : "cloudera-health-monitoring-ozone-basic-canary-bucket",
"storageType" : "DISK",
"versioning" : false,
"usedBytes" : 0,
"usedNamespace" : 0,
"creationTime" : "2024-04-26T05:36:47.158Z",
"modificationTime" : "2024-04-26T05:36:47.158Z",
"sourcePathExist" : true,
"quotaInBytes" : -1,
"quotaInNamespace" : -1,
"bucketLayout" : "FILE_SYSTEM_OPTIMIZED",
"owner" : "hue",
"link" : false
} ]
- bucket 생성
$ ozone sh bucket create o3://ozone1/ice/icebucket
- iceberg테이블 데이터 저장될 경로 생성
# 예시 ofs://omservice/volume1/bucket1/dir1/key1
$ ozone fs -mkdir -p ofs://ozone1/ice/warehouse/iceberg/default
2. impala 이용한 iceberg 테이블 생성
CREATE TABLE iceberg_table(
id int,
value string)
STORED BY ICEBERG
LOCATION 'ofs://ozone1/ice/warehouse/iceberg/default/iceberg_table';
- ozone에 생성된 테이블 디렉토리 구조 확인
$ ozone fs -ls -C -R ofs://ozone1/ice/warehouse/iceberg/default/iceberg_table
ofs://ozone1/ice/warehouse/iceberg/default/iceberg_table/data
ofs://ozone1/ice/warehouse/iceberg/default/iceberg_table/data/234a038e5c28783f-69bcc54500000000_1438460043_data.0.parq
ofs://ozone1/ice/warehouse/iceberg/default/iceberg_table/metadata
ofs://ozone1/ice/warehouse/iceberg/default/iceberg_table/metadata/00000-8c0a66d9-3d17-43e7-8d99-b45c2fce3192.metadata.json
ofs://ozone1/ice/warehouse/iceberg/default/iceberg_table/metadata/00001-1ea26119-35d3-43c2-82ae-ae09e08264fa.metadata.json
ofs://ozone1/ice/warehouse/iceberg/default/iceberg_table/metadata/03106ac0-5262-4241-8cc9-a74ff54bcf42-m0.avro
ofs://ozone1/ice/warehouse/iceberg/default/iceberg_table/metadata/snap-2112183499879079001-1-03106ac0-5262-4241-8cc9-a74ff54bcf42.avro
- iceberg 테이블 파일 구조
Metadata file(metadata.json) – Contains table state including table schema, partitioning spec, and list of snapshots.
Manifest list(.avro) – Snapshots of table state are taken whenever the state is modified by table operations. A manifest list is generated for each snapshot to keep a track of manifest files associated with the snapshot.
Manifest file(snap---.avro) – Contains list of data files and the range of partition values in the data files. Data files tracked by multiple manifest files can be part of a snapshot.
- 'format-version' = '2' 로 테이블 생성
CREATE TABLE iceberg_table2(
id int,
value string)
STORED BY ICEBERG
LOCATION 'ofs://ozone1/ice/warehouse/iceberg/default/iceberg_table2'
TBLPROPERTIES ('format-version' = '2');
'format-version' = '2' --> copy on write 또는 merge on read
default 는 'format-version' = '1' --> copy on write 만
참고 https://bigdataenthusiast.medium.com/apache-iceberg-table-formats-bf0c2c09b389
Apache Iceberg Table Format Versions
In this blog we will explore mainly these things.
bigdataenthusiast.medium.com
2-1. TIME TRAVEL
INSERT INTO iceberg_table2 VALUES (1,'a'), (2,'b');
DESCRIBE HISTORY iceberg_table2;
INSERT INTO iceberg_table2 VALUES (3,'c'), (4,'d');
DESCRIBE HISTORY iceberg_table2;
- 두번째 인서트 전 시간 으로 select
SELECT * FROM iceberg_table2 FOR SYSTEM_TIME AS OF '2024-04-29 16:12:00';
-- OR
SELECT * FROM iceberg_table2 FOR SYSTEM_VERSION AS OF <snapshot_id>;
- 현재 시점 select
SELECT * FROM iceberg_table2;
2-2. PARTITION EVOLUTION
CREATE TABLE iceberg_weblogs (
`time` timestamp,
app string,
request string,
response_code int)
PARTITIONED BY SPEC(day(`time`))
STORED BY ICEBERG
LOCATION 'ofs://ozone1/ice/warehouse/iceberg/default/iceberg_weblogs';
- 데이터 insert
INSERT INTO iceberg_weblogs VALUES('2023-01-17 18:35:49', 'metastore', 'GET /metastore/table/default/sample_07 HTTP/1.1', 200);
INSERT INTO iceberg_weblogs VALUES('2023-01-17 18:50:12', 'search', 'GET /search/?collection=10000001 HTTP/1.1', 200);
INSERT INTO iceberg_weblogs VALUES('2023-01-17 19:10:30', 'metastore', 'GET /metastore/table/default/sample_07 HTTP/1.1', 200);
- 디렉토리 구조 확인
$ ozone fs -ls -C -R ofs://ozone1/ice/warehouse/iceberg/default/iceberg_weblogs
- 파티션 변경
ALTER TABLE iceberg_weblogs
SET PARTITION SPEC(day(`time`), app);
- 추가 데이터 insert
INSERT INTO iceberg_weblogs VALUES('2023-01-18 18:35:49', 'metastore', 'GET /metastore/table/default/sample_07 HTTP/1.1', 200);
INSERT INTO iceberg_weblogs VALUES('2023-01-18 18:50:12', 'search', 'GET /search/?collection=10000001 HTTP/1.1', 200);
INSERT INTO iceberg_weblogs VALUES('2023-01-18 19:10:30', 'metastore', 'GET /metastore/table/default/sample_07 HTTP/1.1', 200);
INSERT INTO iceberg_weblogs VALUES('2023-01-19 18:35:49', 'metastore', 'GET /metastore/table/default/sample_07 HTTP/1.1', 200);
INSERT INTO iceberg_weblogs VALUES('2023-01-19 18:50:12', 'search', 'GET /search/?collection=10000001 HTTP/1.1', 200);
INSERT INTO iceberg_weblogs VALUES('2023-01-19 19:10:30', 'metastore', 'GET /metastore/table/default/sample_07 HTTP/1.1', 200);
- 디렉토리 구조 확인
$ ozone fs -ls -C -R ofs://ozone1/ice/warehouse/iceberg/default/iceberg_weblogs
파티션 변경한 이후부터 insert된 데이터는 변경된 파티션에 맞춰 하위 디렉토리 생겨서 들어옴
'Hadoop Eco' 카테고리의 다른 글
local airflow, cloudera spark(yarn) 연결 테스트 (0) | 2025.04.28 |
---|---|
HBase, Pheonix tutorial (0) | 2024.05.06 |
Kudu migration 방안 (0) | 2024.04.15 |
Hive Metastore에서 테이블 리스트 추출 (0) | 2024.04.05 |
impala 통계 정보 (0) | 2024.04.01 |