Skip to content

Commit

Permalink
address comment
Browse files Browse the repository at this point in the history
  • Loading branch information
xuyifangreeneyes committed Apr 14, 2023
1 parent d9a3aa2 commit c57ea0d
Showing 1 changed file with 22 additions and 21 deletions.
43 changes: 22 additions & 21 deletions planner/core/stats.go
Original file line number Diff line number Diff line change
Expand Up @@ -159,34 +159,35 @@ func (p *baseLogicalPlan) DeriveStats(childStats []*property.StatsInfo, selfSche
return profile, nil
}

// getTotalRowCount returns the total row count, which is obtained when collecting colHist.
func getTotalRowCount(statsTbl *statistics.Table, colHist *statistics.Column) int64 {
if colHist.IsFullLoad() {
return int64(colHist.TotalRowCount())
}
// If colHist is not fully loaded, we may still get its total row count from other index/column stats.
for _, idx := range statsTbl.Indices {
if idx.IsFullLoad() && idx.LastUpdateVersion == colHist.LastUpdateVersion {
return int64(idx.TotalRowCount())
}
}
for _, col := range statsTbl.Columns {
if col.IsFullLoad() && col.LastUpdateVersion == colHist.LastUpdateVersion {
return int64(col.TotalRowCount())
}
}
return 0
}

// getColumnNDV computes estimated NDV of specified column using the original
// histogram of `DataSource` which is retrieved from storage(not the derived one).
func (ds *DataSource) getColumnNDV(colID int64) (ndv float64) {
hist, ok := ds.statisticTable.Columns[colID]
if ok && hist.IsStatsInitialized() {
ndv = float64(hist.Histogram.NDV)
// TODO: a better way to get the row count derived from the last analyze.
analyzeCount := int64(0)
if hist.IsFullLoad() {
analyzeCount = int64(hist.TotalRowCount())
} else {
for _, idx := range ds.statisticTable.Indices {
if idx.IsFullLoad() && idx.LastUpdateVersion == hist.LastUpdateVersion {
analyzeCount = int64(idx.TotalRowCount())
break
}
}
if analyzeCount == 0 {
for _, col := range ds.statisticTable.Columns {
if col.IsFullLoad() && col.LastUpdateVersion == hist.LastUpdateVersion {
analyzeCount = int64(col.TotalRowCount())
break
}
}
}
}
// TODO: a better way to get the total row count derived from the last analyze.
analyzeCount := getTotalRowCount(ds.statisticTable, hist)
if analyzeCount > 0 {
factor := float64(ds.statisticTable.RealtimeCount) / hist.TotalRowCount()
factor := float64(ds.statisticTable.RealtimeCount) / float64(analyzeCount)
ndv *= factor
}
} else {
Expand Down

0 comments on commit c57ea0d

Please sign in to comment.