Recently I faced a strange issue with EBR (Edition-Based Redefinition).
In order to understand how things work internally I decided to use DTrace.
I wanted to suspend specific user session when session acquired AE lock.
There are similar enq_trace.sh script that written by
Tanel Poder.
Tanel's script could be extended to accommodate to my specific requirements if needed.
My
enq_suspend.d script is below:
#!/usr/sbin/dtrace -s
# pragma D option quiet
# pragma D option destructive
struct ksqrs_t {
long addr;
char b1[72];
uint32_t id1;
uint32_t id2;
char b2[4];
char idt[2];/*92=offset of X$KSQRS.KSQRSIDT column from x$kqfco*/
char b3[18];
};
struct ksqrs_t ksqrs;
pid$target:oracle:ksqgtl*:entry
{
ksqeq_lkadr = arg0; /* X$KSQEQ.KSQLKADR*/
mode = arg1;
timeout = arg3;
lock_indx = arg4; /*X$KSIRESTYP.INDX*/
flags = arg7;
}
pid$target:oracle:ksqgtl*:return
{
ksqeq_lkres = *(long *)copyin(ksqeq_lkadr+8,8);
ksqrs = *(struct ksqrs_t *)copyin(ksqeq_lkres,112);
ksqrs.addr = ksqeq_lkres;
printf("%d [%Y] %s: *** %s-%08x-%08x mode=%d flags=0x%x timeout=%d\n",
timestamp,
walltimestamp,
probefunc,
ksqrs.idt,
ksqrs.id1,
ksqrs.id2,
mode,
flags,
timeout
);
}
pid$target:oracle:ksqrcl*:entry
{
enqrs_addr = *(long *)copyin(arg0+8,8); /*X$KSQRS.ADDR enQueue Resource*/
ksqrs = *(struct ksqrs_t *)copyin(enqrs_addr, 112);
ksqrs.addr = enqrs_addr;
printf("%d [%Y] %s: *** %s-%08x-%08x x$ksqrs.addr=0x%016x\n",
timestamp,
walltimestamp,
probefunc,
ksqrs.idt,
ksqrs.id1,
ksqrs.id2,
ksqrs.addr
);
}
pid$target:oracle:ksqrcl*:entry
/ksqrs.idt==$$1/
{
printf("%d [%Y] %s.%s: lock_type=%s suspending execution\n", timestamp, walltimestamp, probefunc, probename, ksqrs.idt);
stop();
}
pid$target:oracle:ksqrcl*:return {}
A brief description of the script.
-
# pragma D option quiet
# pragma D option destructive
Minimize output, allow destructive actions. We will use "stop()" call further.
- struct ksqrs_t
struct ksqrs_t {
long addr;
char b1[72];/*unknown not needed*/
uint32_t id1;/*lock ID1*/
uint32_t id2;/*lock ID2*/
char b2[4];/*unknown not needed*/
char idt[2];/*lock type, 92=offset of X$KSQRS.KSQRSIDT column from x$kqfco*/
char b3[18];/*unknown not needed*/
};
The above structure describes X$KSQRS entry (Kernel Services enQueue ReSource):
SQL> select c.kqfconam column_name,
2 c.kqfcodty datatype,
3 c.kqfcosiz size_byte,
4 c.kqfcooff offset
5 from x$kqfta t,
6 x$kqfco c
7 where t.kqftanam = 'X$KSQRS'
8 and c.kqfcotab = t.indx
9 order by c.indx
10 /
COLUMN_NAME DATATYPE SIZE_BYTE OFFSET
----------- ---------- ---------- ----------
ADDR 23 8 0
INDX 2 4 0
INST_ID 2 4 0
KSQRSID1 2 4 80
KSQRSID2 2 4 84
KSQRSIDT 1 2 92
KSQRSFLG 2 1 111
ID1,ID2,IDT columns maps to relevant GV$LOCK columns, this can be obtained from V$FIXED_VIEW_DEFINITION:
select s.inst_id,
l.laddr,
l.kaddr,
s.ksusenum,
r.ksqrsidt,
r.ksqrsid1,
r.ksqrsid2,
l.lmode,
l.request,
l.ctime,
decode(l.lmode,0,0,l.block)
from v$_lock l,
x$ksuse s,
x$ksqrs r
where l.saddr=s.addr
and concat(USERENV('Instance'),l.raddr)=concat(r.inst_id,r.addr)
- ksqgtl.entry - get lock function, entry point
pid$target:oracle:ksqgtl*:entry
{
ksqeq_lkadr = arg0; /* X$KSQEQ.KSQLKADR*/
mode = arg1;
timeout = arg3;
lock_indx = arg4; /*X$KSIRESTYP.INDX*/
flags = arg7;
}
Interesting ksqgtl function arguments saved for further usage. In particularly, arg0 - is X$KSQEQ.KSQLKADR (lock address?).
- ksqgtl.return - get lock function, return
pid$target:oracle:ksqgtl*:return
{
ksqeq_lkres = *(long *)copyin(ksqeq_lkadr+8,8);
ksqrs = *(struct ksqrs_t *)copyin(ksqeq_lkres,112);
ksqrs.addr = ksqeq_lkres;
printf("%d [%Y] %s: *** %s-%08x-%08x mode=%d flags=0x%x timeout=%d\n",
timestamp,
walltimestamp,
probefunc,
ksqrs.idt,
ksqrs.id1,
ksqrs.id2,
mode,
flags,
timeout
);
}
We will interesting in "ID1", "ID2", "TYPE" lock attributes. We can fully decode them from X$KSQRS fixed table.
But ksqgtl called with X$KSQEQ.KSQLKADR. X$KSQEQ.KSQLKRES maps to X$KSQRS.ADDR.
SQL> select c.kqfconam column_name,
2 c.kqfcodty datatype,
3 c.kqfcosiz size_byte,
4 c.kqfcooff offset
5 from x$kqfta t,
6 x$kqfco c
7 where t.kqftanam = 'X$KSQEQ'
8 and c.kqfcotab = t.indx
9 order by c.indx
10 /
COLUMN_NAME DATATYPE SIZE_BYTE OFFSET
----------- ---------- ---------- ----------
ADDR 23 8 0
INDX 2 4 0
INST_ID 2 4 0
KSSOBFLG 2 4 0
KSSOBOWN 23 8 0
KSQLKADR 23 8 88
KSQLKRES 23 8 96
KSQLKMOD 2 1 176
KSQLKREQ 2 1 177
KSQLKMXH 2 2 178
KSQLKSES 23 8 0
KSQLKCTIM 2 4 0
KSQLKLBLK 2 4 0
We will print debug output similar to Tanel Poder script and 10704 event format.
-
pid$target:oracle:ksqrcl*:entry
{
enqrs_addr = *(long *)copyin(arg0+8,8); /*X$KSQRS.ADDR enQueue Resource*/
ksqrs = *(struct ksqrs_t *)copyin(enqrs_addr, 112);
ksqrs.addr = enqrs_addr;
printf("%d [%Y] %s: *** %s-%08x-%08x x$ksqrs.addr=0x%016x\n",
timestamp,
walltimestamp,
probefunc,
ksqrs.idt,
ksqrs.id1,
ksqrs.id2,
ksqrs.addr
);
}
- ksqrcl function (release lock) entry point:
pid$target:oracle:ksqrcl*:entry
{
enqrs_addr = *(long *)copyin(arg0+8,8); /*X$KSQRS.ADDR enQueue Resource*/
ksqrs = *(struct ksqrs_t *)copyin(enqrs_addr, 112);
ksqrs.addr = enqrs_addr;
printf("%d [%Y] %s: *** %s-%08x-%08x x$ksqrs.addr=0x%016x\n",
timestamp,
walltimestamp,
probefunc,
ksqrs.idt,
ksqrs.id1,
ksqrs.id2,
ksqrs.addr
);
}
We will populate a ksqrs struct again.
- If lock type equals to parameter passed to script, we will suspend process execution:
pid$target:oracle:ksqrcl*:entry
/ksqrs.idt==$$1/
{
printf("%d [%Y] %s.%s: lock_type=%s suspending execution\n", timestamp, walltimestamp, probefunc, probename, ksqrs.idt);
stop();
}
Short demonstration how this works.
Suppose I will want suspend server process when session acquires TM lock.
1. I opened new session to the DB and determine server process ID and tracefile location:
SQL> select p.spid,
2 p.tracefile
3 from v$session s,
4 v$process p
5 where s.sid = sys_context( 'userenv', 'sid')
6 and p.addr = s.paddr
7 /
SPID TRACEFILE
----- ----------------------------------------------------------------------------------------------------
1968 /pub/home/oracle/diag/rdbms/orcl/orcl/trace/orcl_ora_1968.trc
2. I created test table:
SQL> create table t (x int);
Table created.
SQL> select object_id,
2 to_char(object_id, 'fm0xxxxxxx') object_id_hex
3 from obj
4 where object_name='T';
OBJECT_ID OBJECT_ID_HEX
---------- ---------------------------
349673 000555e9
3. Now I enable additional diagnostics: event 10704 to trace enqueues:
SQL> alter session set events '10704 level 2';
Session altered.
4. Next I will run DTrace script
enq_suspend.d passing server process ID as parameter:
oracle@localhost dtrace$ ./enq_suspend.d -p 1968 TM
5. Trying to truncate table T:
Session suspending.
6. Observed output into console with DTrace script:
oracle@localhost dtrace$ ./enq_suspend.d -p 1968 TM
33969273235645237 [2015 Feb 9 12:42:21] ksqgtlctx: *** TM-000555e9-00000000 mode=6 flags=0x401 timeout=0
33969273240887878 [2015 Feb 9 12:42:21] ksqgtlctx: *** TX-0004001b-0000f92e mode=6 flags=0x401 timeout=0
33969273245689382 [2015 Feb 9 12:42:21] ksqrcl: *** TX-0004001b-0000f92e x$ksqrs.addr=0x000000041cda5c90
33969273245764458 [2015 Feb 9 12:42:21] ksqrcli: *** TX-0004001b-0000f92e x$ksqrs.addr=0x000000041cda5c90
33969273246279405 [2015 Feb 9 12:42:21] ksqrcl: *** TM-000555e9-00000000 x$ksqrs.addr=0x000000041cda25d0
33969273246301204 [2015 Feb 9 12:42:21] ksqrcl.entry: lock_type=TM suspending execution
You can see that when calling ksqrcl with TM-000555e9-00000000 (our object id) execution is suspending.
We can gather any required diagnostic data at this step.
7. 10704 event data in the trace file:
*** 2015-02-09 12:42:21.102
ksqgtl *** TM-000555e9-00000000 mode=6 flags=0x401 timeout=0 ***
ksqgtl: xcb=0x419a6f068, ktcdix=2147483647, topxcb=0x419a6f068
ktcipt(topxcb)=0x0
ksucti: init txn DID from session DID
ksqgtl:
ksqlkdid: 0001-0019-00000142
*** ksudidTrace: ksqgtl
ktcmydid(): 0001-0019-00000142
ksusesdi: 0001-0019-00000143
ksusetxn: 0001-0019-00000142
ksqgtl: RETURNS 0
ksqgtl *** TX-0004001b-0000f92e mode=6 flags=0x401 timeout=0 ***
ksqgtl: xcb=0x419a6f068, ktcdix=2147483647, topxcb=0x419a6f068
ktcipt(topxcb)=0x0
ksucti: init session DID from txn DID:
ksqgtl:
ksqlkdid: 0001-0019-00000142
*** ksudidTrace: ksqgtl
ktcmydid(): 0001-0019-00000142
ksusesdi: 0001-0019-00000143
ksusetxn: 0001-0019-00000142
ksqgtl: RETURNS 0
ksqrcl: TX,4001b,f92e
ksqrcl: returns 0
enq_suspend.d script can be further extending for more complex conditions: such as stop when session acquires TM lock with particular object_id and mode, and so on.
Script verified in my environment: Oracle Database version 11.2.0.3 on Solaris 10 SPARC64.