Skip to content
Closed
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -1602,42 +1602,44 @@ class StatisticsSuite extends StatisticsCollectionTestBase with TestHiveSingleto
val tbl = "SPARK_30269"
val ext_tbl = "SPARK_30269_external"
withTempDir { dir =>
withTable(tbl, ext_tbl) {
sql(s"CREATE TABLE $tbl (key INT, value STRING, ds STRING)" +
"USING parquet PARTITIONED BY (ds)")
sql(
s"""
| CREATE TABLE $ext_tbl (key INT, value STRING, ds STRING)
| USING PARQUET
| PARTITIONED BY (ds)
| LOCATION '${dir.toURI}'
withSQLConf(SQLConf.AUTO_SIZE_UPDATE_ENABLED.key -> "false") {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thank you for ensuring this (although this is the default as you mentioned, @pan3793 )

withTable(tbl, ext_tbl) {
sql(s"CREATE TABLE $tbl (key INT, value STRING, ds STRING)" +
"USING parquet PARTITIONED BY (ds)")
sql(
s"""
| CREATE TABLE $ext_tbl (key INT, value STRING, ds STRING)
| USING PARQUET
| PARTITIONED BY (ds)
| LOCATION '${dir.toURI}'
""".stripMargin)

Seq(tbl, ext_tbl).foreach { tblName =>
sql(s"INSERT INTO $tblName VALUES (1, 'a', '2019-12-13')")

val expectedSize = 690
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can we compare with the size as unexpected before insertion instead?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

when spark.sql.statistics.size.autoUpdate.enabled is false (it's the default value), table stats is None until executing ANALYZE TABLE ...

I update the test to reflect that.

// analyze table
sql(s"ANALYZE TABLE $tblName COMPUTE STATISTICS NOSCAN")
var tableStats = getTableStats(tblName)
assert(tableStats.sizeInBytes == expectedSize)
assert(tableStats.rowCount.isEmpty)

sql(s"ANALYZE TABLE $tblName COMPUTE STATISTICS")
tableStats = getTableStats(tblName)
assert(tableStats.sizeInBytes == expectedSize)
assert(tableStats.rowCount.get == 1)

// analyze a single partition
sql(s"ANALYZE TABLE $tblName PARTITION (ds='2019-12-13') COMPUTE STATISTICS NOSCAN")
var partStats = getPartitionStats(tblName, Map("ds" -> "2019-12-13"))
assert(partStats.sizeInBytes == expectedSize)
assert(partStats.rowCount.isEmpty)

sql(s"ANALYZE TABLE $tblName PARTITION (ds='2019-12-13') COMPUTE STATISTICS")
partStats = getPartitionStats(tblName, Map("ds" -> "2019-12-13"))
assert(partStats.sizeInBytes == expectedSize)
assert(partStats.rowCount.get == 1)
Seq(tbl, ext_tbl).foreach { tblName =>
sql(s"INSERT INTO $tblName VALUES (1, 'a', '2019-12-13')")
assert(getCatalogTable(tblName).stats.isEmpty)

// analyze table
sql(s"ANALYZE TABLE $tblName COMPUTE STATISTICS NOSCAN")
var tableStats = getTableStats(tblName)
val expectedSize = tableStats.sizeInBytes
assert(tableStats.rowCount.isEmpty)

sql(s"ANALYZE TABLE $tblName COMPUTE STATISTICS")
tableStats = getTableStats(tblName)
assert(tableStats.sizeInBytes == expectedSize)
assert(tableStats.rowCount.get == 1)

// analyze a single partition
sql(s"ANALYZE TABLE $tblName PARTITION (ds='2019-12-13') COMPUTE STATISTICS NOSCAN")
var partStats = getPartitionStats(tblName, Map("ds" -> "2019-12-13"))
assert(partStats.sizeInBytes == expectedSize)
assert(partStats.rowCount.isEmpty)

sql(s"ANALYZE TABLE $tblName PARTITION (ds='2019-12-13') COMPUTE STATISTICS")
partStats = getPartitionStats(tblName, Map("ds" -> "2019-12-13"))
assert(partStats.sizeInBytes == expectedSize)
assert(partStats.rowCount.get == 1)
}
}
}
}
Expand Down