iceberg初实践
1.在spark安装目录jar文件夹下导入iceberg包
2.在spark bin目录下运行./spark-shell,执行下面代码
import org.apache.hadoop.conf.Configuration import org.apache.iceberg.hadoop.HadoopCatalog import org.apache.iceberg.catalog.TableIdentifier import org.apache.iceberg.spark.SparkSchemaUtil val conf=new Configuration val warehousePath = "hdfs://dc1:8020/..." val catalog=new HadoopCatalog(conf,warehousePath) val name=TableIdentifier.of("default","test") val data=Seq((1,"a"),(2,"b"),(3,"c")).toDF("id","data") val schema=SparkSchemaUtil.convert(data.schema) val table=catalog.createTable(name,schema) val df=spark.read.format("iceberg").load("hdfs://dc1:8020/.../default/test") df.printSchema df.show() df.createTempView("test") spark.sql("select * from test").show() data.write.format("iceberg").mode("append").save("hdfs://dc1:8020/.../default/test")