orawin.info

Niall's Oracle Pages – Oracle Opinion since 2004

histograms

with 7 comments

This post is by way of reply to Richard Foote’s latest quiz available here Richard wants to know how many columns a histogram will be collected on given certain conditions. I believe that the answer depends on the database version (so 9i will behave differently from 10.2 and above for example). For my tests I’m using the script below:

The aim of the script is to run the suggested gather stats routine with no workload, with workloads where each query only queries one column (as per Richard’s example where clause) and where different queries query each of the columns, as per Richard’s description. I then add a single row and see what happens to the histograms.

Update

It is worth comparing the script below with a script that populates data using the mod function.


/*

File :      Histograms.sql 
Date :		Aug 2011 (just)
Purpose:	Demo the quiz in http://richardfoote.wordpress.com/2011/08/31/method_opt-size-auto-quiz-automatic-for-the-people/

*/

-- create the table
drop table afp purge;

create table afp(
	c1	number
,	c2	number
,	c3 	number);

-- now populate
begin
	for i in 1..1000000 loop
		insert into afp(c1,c2,c3) values (i,mod(i,254),mod(i,254));
	end loop;
	commit;
end;
/


prompt 'Print Version'
select banner from v$version;

exec dbms_stats.gather_table_stats(user,'AFP',method_opt => 'FOR ALL COLUMNS SIZE AUTO');
col column_name format a30
prompt 'Histogram Info with no queries'
select
	column_name
,	count(*)
from user_histograms
where 
	table_name = 'AFP'
group by 
	column_name
order by 
	column_name
;
-- run my sample multicolumn queries
-- use a pl/sql routine so as I get bind vars
-- doesn't have to be

declare
   q1count number := 10;
   q2count number := 10;
   q3count number := 10;
   retval  number;
begin
-- query 1
for i in 1..q1count loop
	select count(*) into retval
	from afp where
	c1 = round(dbms_random.value(1,100000));
end loop;

for i in 1..q2count loop
	select count(*) into retval
	from afp where
	c2 = round(dbms_random.value(1,100000));
end loop;

for i in 1..q3count loop
	select count(*) into retval
	from afp where
	c3 = round(dbms_random.value(1,100000));
end loop;

end;
/

exec dbms_stats.gather_table_stats(user,'AFP',method_opt => 'FOR ALL COLUMNS SIZE AUTO');

prompt 'Histogram Info with single col queries'
select
	column_name
,	count(*)
from user_histograms
where 
	table_name = 'AFP'
group by 
	column_name
order by 
	column_name
;

-- run my sample multicolumn queries
-- use a pl/sql routine so as I get bind vars
-- doesn't have to be

declare
   q1count number := 10;
   q2count number := 10;
   q3count number := 10;
   retval  number;
begin
-- query 1
for i in 1..q1count loop
	select count(*) into retval
	from afp where
	c1 = round(dbms_random.value(1,100000)) and c2 = i*i and c3 = round(dbms_random.normal()*i,0);
end loop;

for i in 1..q2count loop
	select count(*) into retval
	from afp where
	c3 = round(dbms_random.value(1,100000)) and c1 = i*i and c2 = round(dbms_random.normal()*i,0);
end loop;

for i in 1..q3count loop
	select count(*) into retval
	from afp where
	c2 = round(dbms_random.value(1,100000)) and c3 = i*i and c1 = round(dbms_random.normal()*i,0);
end loop;

end;
/

exec dbms_stats.gather_table_stats(user,'AFP',method_opt => 'FOR ALL COLUMNS SIZE AUTO');

prompt 'Histogram Info with queries on all columns'
select
	column_name
,	count(*)
from user_histograms
where 
	table_name = 'AFP'
group by 
	column_name
order by 
	column_name
;

prompt 'add data '

insert into afp VALUES (1000001, 42, 99999999);
commit;

exec dbms_stats.gather_table_stats(user,'AFP',method_opt => 'FOR ALL COLUMNS SIZE AUTO');
col column_name format a30
prompt 'Histogram Info with no queries'
select
	column_name
,	count(*)
from user_histograms
where 
	table_name = 'AFP'
group by 
	column_name
order by 
	column_name
;
-- run my sample multicolumn queries
-- use a pl/sql routine so as I get bind vars
-- doesn't have to be

declare
   q1count number := 10;
   q2count number := 10;
   q3count number := 10;
   retval  number;
begin
-- query 1
for i in 1..q1count loop
	select count(*) into retval
	from afp where
	c1 = round(dbms_random.value(1,100000));
end loop;

for i in 1..q2count loop
	select count(*) into retval
	from afp where
	c2 = round(dbms_random.value(1,100000));
end loop;

for i in 1..q3count loop
	select count(*) into retval
	from afp where
	c3 = round(dbms_random.value(1,100000));
end loop;

end;
/

exec dbms_stats.gather_table_stats(user,'AFP',method_opt => 'FOR ALL COLUMNS SIZE AUTO');

prompt 'Histogram Info with single col queries'
select
	column_name
,	count(*)
from user_histograms
where 
	table_name = 'AFP'
group by 
	column_name
order by 
	column_name
;

-- run my sample multicolumn queries
-- use a pl/sql routine so as I get bind vars
-- doesn't have to be

declare
   q1count number := 10;
   q2count number := 10;
   q3count number := 10;
   retval  number;
begin
-- query 1
for i in 1..q1count loop
	select count(*) into retval
	from afp where
	c1 = round(dbms_random.value(1,100000)) and c2 = i*i and c3 = round(dbms_random.normal()*i,0);
end loop;

for i in 1..q2count loop
	select count(*) into retval
	from afp where
	c3 = round(dbms_random.value(1,100000)) and c1 = i*i and c2 = round(dbms_random.normal()*i,0);
end loop;

for i in 1..q3count loop
	select count(*) into retval
	from afp where
	c2 = round(dbms_random.value(1,100000)) and c3 = i*i and c1 = round(dbms_random.normal()*i,0);
end loop;

end;
/

exec dbms_stats.gather_table_stats(user,'AFP',method_opt => 'FOR ALL COLUMNS SIZE AUTO');

prompt 'Histogram Info with queries on all columns'
select
	column_name
,	count(*)
from user_histograms
where 
	table_name = 'AFP'
group by 
	column_name
order by 
	column_name
;

My Results are as follows
11.2.0.2

BANNER
--------------------------------------------------------------------------------
Oracle Database 11g Enterprise Edition Release 11.2.0.2.0 - 64bit Production
PL/SQL Release 11.2.0.2.0 - Production
CORE    11.2.0.2.0      Production
TNS for 64-bit Windows: Version 11.2.0.2.0 - Production
NLSRTL Version 11.2.0.2.0 - Production


PL/SQL procedure successfully completed.

'Histogram Info with no queries'

COLUMN_NAME                      COUNT(*)
------------------------------ ----------
C1                                      2
C2                                      2
C3                                      2


PL/SQL procedure successfully completed.


PL/SQL procedure successfully completed.

'Histogram Info with single col queries'

COLUMN_NAME                      COUNT(*)
------------------------------ ----------
C1                                      2
C2                                    254
C3                                    254


PL/SQL procedure successfully completed.


PL/SQL procedure successfully completed.

'Histogram Info with queries on all columns'

COLUMN_NAME                      COUNT(*)
------------------------------ ----------
C1                                      2
C2                                    254
C3                                    254

'add data '

1 row created.


Commit complete.


PL/SQL procedure successfully completed.

'Histogram Info with no queries'

COLUMN_NAME                      COUNT(*)
------------------------------ ----------
C1                                      2
C2                                    254
C3                                    235


PL/SQL procedure successfully completed.


PL/SQL procedure successfully completed.

'Histogram Info with single col queries'

COLUMN_NAME                      COUNT(*)
------------------------------ ----------
C1                                      2
C2                                    254
C3                                    235


PL/SQL procedure successfully completed.


PL/SQL procedure successfully completed.

'Histogram Info with queries on all columns'

COLUMN_NAME                      COUNT(*)
------------------------------ ----------
C1                                      2
C2                                    254
C3                                    236

10.2.0.4


SQL> @test
drop table afp purge
           *
ERROR at line 1:
ORA-00942: table or view does not exist 



Table created.


PL/SQL procedure successfully completed.

'Print Version'

BANNER                                                                          
----------------------------------------------------------------                
Oracle Database 10g Enterprise Edition Release 10.2.0.4.0 - Prod                
PL/SQL Release 10.2.0.4.0 - Production                                          
CORE	10.2.0.4.0	Production                                                      
TNS for 32-bit Windows: Version 10.2.0.4.0 - Production                         
NLSRTL Version 10.2.0.4.0 - Production                                          


PL/SQL procedure successfully completed.

'Histogram Info with no queries'

COLUMN_NAME                      COUNT(*)                                       
------------------------------ ----------                                       
C1                                      2                                       
C2                                      2                                       
C3                                      2                                       


PL/SQL procedure successfully completed.


PL/SQL procedure successfully completed.

'Histogram Info with single col queries'

COLUMN_NAME                      COUNT(*)                                       
------------------------------ ----------                                       
C1                                      2                                       
C2                                    254                                       
C3                                    254                                       


PL/SQL procedure successfully completed.


PL/SQL procedure successfully completed.

'Histogram Info with queries on all columns'

COLUMN_NAME                      COUNT(*)                                       
------------------------------ ----------                                       
C1                                      2                                       
C2                                    254                                       
C3                                    254                                       

'add data '

1 row created.


Commit complete.


PL/SQL procedure successfully completed.

'Histogram Info with no queries'

COLUMN_NAME                      COUNT(*)                                       
------------------------------ ----------                                       
C1                                      2                                       
C2                                    254                                       
C3                                    254                                       


PL/SQL procedure successfully completed.


PL/SQL procedure successfully completed.

'Histogram Info with single col queries'

COLUMN_NAME                      COUNT(*)                                       
------------------------------ ----------                                       
C1                                      2                                       
C2                                    254                                       
C3                                    254                                       


PL/SQL procedure successfully completed.


PL/SQL procedure successfully completed.

'Histogram Info with queries on all columns'

COLUMN_NAME                      COUNT(*)                                       
------------------------------ ----------                                       
C1                                      2                                       
C2                                    254                                       
C3                                    254                                       

So in 11.2.0.2 the addition of the single column tips the optimizer over from a frequency histogram to a height balanced histogram, just by virtue of performing the insert. For 10.2.0.4 no difference is seen even after running my sample queries.

Possibly Related Posts:

  • No Related Posts found

Written by Niall Litchfield

August 31st, 2011 at 2:52 pm

Posted in Uncategorized

7 Responses to 'histograms'

Subscribe to comments with RSS or TrackBack to 'histograms'.

  1. Niall,

    Do you see different results if you perform 100% sampling?
    exec dbms_stats.gather_table_stats(user,’AFP’, ESTIMATE_PERCENT=>NULL
    ,method_opt => ‘FOR ALL COLUMNS SIZE AUTO’);

    Charles Hooper

    31 Aug 11 at 3:21 pm

  2. Good point

    Re-running and will update on Richard’s site as well.

    Niall Litchfield

    31 Aug 11 at 3:39 pm

  3. Interesting because I am not getting frequency histogram.

    The following modifications were made to your script.

    create table afp as 
    select level c1, 
    mod(round(dbms_random.value(1,100000)),255) c2, 
    mod(round(dbms_random.value(1,100000)),255) c3 
    from dual connect by level 100,
    
    SELECT 
    table_name, 
    column_name, 
    num_distinct, 
    density, 
    num_buckets,
    histogram  
    FROM 
    user_tab_columns 
    WHERE table_name = 'AFP';
    

    mdinh

    31 Aug 11 at 6:14 pm

  4. 11.2.0.2.0 – 64bit Production

    ‘Histogram Info with no queries’

    TABLE_NAME COLUMN_NAME NUM_DISTINCT DENSITY NUM_BUCKETS HISTOGRAM
    ———- ———– ———— ———- ———– ———————————————
    AFP C1 1000000 .000001 1 NONE
    AFP C2 255 .003921569 1 NONE
    AFP C3 255 .003921569 1 NONE

    ‘Histogram Info with queries on all columns’

    TABLE_NAME COLUMN_NAME NUM_DISTINCT DENSITY NUM_BUCKETS HISTOGRAM
    ———- ———– ———— ———- ———– ———————————————
    AFP C1 1000000 .000001 1 NONE
    AFP C2 255 .003922055 254 HEIGHT BALANCED
    AFP C3 255 .003921957 254 HEIGHT BALANCED

    ‘Histogram Info with queries on all columns’

    TABLE_NAME COLUMN_NAME NUM_DISTINCT DENSITY NUM_BUCKETS HISTOGRAM
    ———- —————————— ———— ———- ———– ———————————————
    AFP C1 1000001 .000001 1 NONE
    AFP C2 255 .003922056 254 HEIGHT BALANCED
    AFP C3 256 .003922128 254 HEIGHT BALANCED

    mdinh

    31 Aug 11 at 6:16 pm

  5. Michael

    I’ve updated your comments with [ code language="sql" ] tags, and removed the dbms stats line. I hope that’s OK.

    Niall Litchfield

    31 Aug 11 at 8:42 pm

  6. Thanks Nial.

    Thought I post here as well.

    Using Nial’s modified script.

    Was curious as to why I was not getting Frequency histogram and it turns out that I had one too many bucket originally, 255 vs 254..

    [/code]
    create table afp as
    select
    level c1,
    mod(round(dbms_random.value(1,100000)),254) c2,
    mod(round(dbms_random.value(1,100000)),254) c3
    from dual connect by level < 1000001;

    11.2.0.2.0 - 64bit Production

    create table afp as
    select
    level c1,
    mod(round(dbms_random.value(1,100000)),254) c2,
    mod(round(dbms_random.value(1,100000)),254) c3
    from dual connect by level < 1000001;

    'Histogram Info with no queries'

    TABLE_NAME COLUMN_NAME NUM_DISTINCT DENSITY NUM_BUCKETS HISTOGRAM
    ---------- ----------- ------------ ---------- ----------- ----------------------------------
    AFP C1 1000000 .000001 1 NONE
    AFP C2 254 .003937008 1 NONE
    AFP C3 254 .003937008 1 NONE

    'Histogram Info with single col queries'

    TABLE_NAME COLUMN_NAME NUM_DISTINCT DENSITY NUM_BUCKETS HISTOGRAM
    ---------- ----------- ------------ ---------- ----------- ----------------------------------
    AFP C1 1000000 .000001 1 NONE
    AFP C2 254 .0000005 254 FREQUENCY
    AFP C3 254 .0000005 254 FREQUENCY

    'Histogram Info with queries on all columns'

    TABLE_NAME COLUMN_NAME NUM_DISTINCT DENSITY NUM_BUCKETS HISTOGRAM
    ---------- ----------- ------------ ---------- ----------- ----------------------------------
    AFP C1 1000000 .000001 1 NONE
    AFP C2 254 .0000005 254 FREQUENCY
    AFP C3 254 .0000005 254 FREQUENCY

    'add data '

    1 row created.

    Commit complete.

    'Histogram Info with no queries'

    TABLE_NAME COLUMN_NAME NUM_DISTINCT DENSITY NUM_BUCKETS HISTOGRAM
    ---------- ------------------------------ ------------ ---------- ----------- ---------------
    AFP C1 1000001 .000001 1 NONE
    AFP C2 254 .0000005 254 FREQUENCY
    AFP C3 255 .003934596 254 HEIGHT BALANCED

    'Histogram Info with single col queries'

    TABLE_NAME COLUMN_NAME NUM_DISTINCT DENSITY NUM_BUCKETS HISTOGRAM
    ---------- ------------------------------ ------------ ---------- ----------- ---------------
    AFP C1 1000001 .000001 1 NONE
    AFP C2 254 .0000005 254 FREQUENCY
    AFP C3 255 .003934596 254 HEIGHT BALANCED

    'Histogram Info with queries on all columns'

    TABLE_NAME COLUMN_NAME NUM_DISTINCT DENSITY NUM_BUCKETS HISTOGRAM
    ---------- ------------------------------ ------------ ---------- ----------- ---------------
    AFP C1 1000001 .000001 1 NONE
    AFP C2 254 .0000005 254 FREQUENCY
    AFP C3 255 .003934596 254 HEIGHT BALANCED

    mdinh

    1 Sep 11 at 1:48 pm

  7. Sorry, realized I spelled your name wrong.

    mdinh

    1 Sep 11 at 1:49 pm

Leave a Reply

%d bloggers like this: