Skip to content

Commit

Permalink
Merge pull request pingcap#13 from TiInterstellar/dumpling-col-type
Browse files Browse the repository at this point in the history
refine dumpling parquet column type
  • Loading branch information
crazycs520 authored Jan 7, 2022
2 parents 15c2585 + ba63ab8 commit 5c04525
Showing 1 changed file with 13 additions and 4 deletions.
17 changes: 13 additions & 4 deletions dumpling/export/writer_util.go
Original file line number Diff line number Diff line change
Expand Up @@ -400,8 +400,16 @@ func WriteInsertInCsv(pCtx *tcontext.Context, cfg *Config, meta TableMeta, tblIR
const parquetFileLimit = 64 * 1024 * 1024 // 64MB

var parquetTypeMap = map[string]string{
"INT": "INT64",
"VARCHAR": "BYTE_ARRAY",
"TINYINT": "INT32",
"SMALLINT": "INT32",
"MEDIUMINT": "INT32",
"INTEGER": "INT64",
"INT": "INT64",
"BIGINT": "INT64",
"YEAR": "INT64",
"VARCHAR": "BYTE_ARRAY",
"FLOAT": "FLOAT",
"DOUBLE": "DOUBLE",
}

func WriteInsertInParquet(pCtx *tcontext.Context, cfg *Config, meta TableMeta, tblIR TableDataIR, w storage.ExternalFileWriter) (u uint64, err error) {
Expand All @@ -412,11 +420,12 @@ func WriteInsertInParquet(pCtx *tcontext.Context, cfg *Config, meta TableMeta, t

// Build metadata that parquet needs.
md := make([]string, meta.ColumnCount())
tps := meta.ColumnTypes()
for k, v := range meta.ColumnNames() {
ot := meta.ColumnTypes()[k]
ot := tps[k]
pt, ok := parquetTypeMap[ot]
if !ok {
panic(fmt.Errorf("type %s is not supported", ot))
pt = "BYTE_ARRAY"
}
md[k] = fmt.Sprintf("name=%s, type=%s", v, pt)
}
Expand Down

0 comments on commit 5c04525

Please sign in to comment.