Страницы

понедельник, 9 февраля 2015 г.

Suspending a users session on specific enqueue

Recently I faced a strange issue with EBR (Edition-Based Redefinition). In order to understand how things work internally I decided to use DTrace.
I wanted to suspend specific user session when session acquired AE lock. There are similar enq_trace.sh script that written by Tanel Poder. Tanel's script could be extended to accommodate to my specific requirements if needed.
My enq_suspend.d script is below:
#!/usr/sbin/dtrace -s

# pragma D option quiet
# pragma D option destructive

struct ksqrs_t {
  long     addr;
  char     b1[72];
  uint32_t id1;
  uint32_t id2;
  char     b2[4];
  char     idt[2];/*92=offset of X$KSQRS.KSQRSIDT column from x$kqfco*/
  char     b3[18];
};

struct ksqrs_t ksqrs;

pid$target:oracle:ksqgtl*:entry 
{ 
  ksqeq_lkadr = arg0; /* X$KSQEQ.KSQLKADR*/
  mode = arg1;
  timeout = arg3;
  lock_indx = arg4; /*X$KSIRESTYP.INDX*/
  flags = arg7;
} 

pid$target:oracle:ksqgtl*:return 
{
  ksqeq_lkres = *(long *)copyin(ksqeq_lkadr+8,8);
  ksqrs = *(struct ksqrs_t *)copyin(ksqeq_lkres,112);
  ksqrs.addr = ksqeq_lkres;
  printf("%d [%Y] %s: *** %s-%08x-%08x mode=%d flags=0x%x timeout=%d\n",
    timestamp,
    walltimestamp,
    probefunc,
    ksqrs.idt,
    ksqrs.id1,
    ksqrs.id2,
    mode, 
    flags, 
    timeout
  );
}

pid$target:oracle:ksqrcl*:entry 
{ 
  enqrs_addr = *(long *)copyin(arg0+8,8); /*X$KSQRS.ADDR enQueue Resource*/
  ksqrs = *(struct ksqrs_t *)copyin(enqrs_addr, 112);
  ksqrs.addr = enqrs_addr;
  printf("%d [%Y] %s: *** %s-%08x-%08x x$ksqrs.addr=0x%016x\n",
    timestamp,
    walltimestamp,
    probefunc,
    ksqrs.idt,
    ksqrs.id1,
    ksqrs.id2,
    ksqrs.addr
  );
} 

pid$target:oracle:ksqrcl*:entry 
/ksqrs.idt==$$1/
{
  printf("%d [%Y] %s.%s: lock_type=%s suspending execution\n", timestamp, walltimestamp, probefunc, probename, ksqrs.idt);
  stop();
}

pid$target:oracle:ksqrcl*:return {}
A brief description of the script.
  • # pragma D option quiet
    # pragma D option destructive
        
    Minimize output, allow destructive actions. We will use "stop()" call further.
  • struct ksqrs_t
    struct ksqrs_t {
      long     addr;
      char     b1[72];/*unknown not needed*/
      uint32_t id1;/*lock ID1*/
      uint32_t id2;/*lock ID2*/
      char     b2[4];/*unknown not needed*/
      char     idt[2];/*lock type, 92=offset of X$KSQRS.KSQRSIDT column from x$kqfco*/
      char     b3[18];/*unknown not needed*/
    };
        
    The above structure describes X$KSQRS entry (Kernel Services enQueue ReSource):
    SQL> select c.kqfconam column_name,
      2         c.kqfcodty datatype,
      3         c.kqfcosiz size_byte,
      4         c.kqfcooff offset
      5    from x$kqfta t,
      6         x$kqfco c
      7   where t.kqftanam = 'X$KSQRS'
      8     and c.kqfcotab = t.indx
      9   order by c.indx
     10  /
    
    COLUMN_NAME   DATATYPE  SIZE_BYTE     OFFSET
    ----------- ---------- ---------- ----------
    ADDR                23          8          0
    INDX                 2          4          0
    INST_ID              2          4          0
    KSQRSID1             2          4         80
    KSQRSID2             2          4         84
    KSQRSIDT             1          2         92
    KSQRSFLG             2          1        111
    
    ID1,ID2,IDT columns maps to relevant GV$LOCK columns, this can be obtained from V$FIXED_VIEW_DEFINITION:
    select s.inst_id, 
           l.laddr,
           l.kaddr,
           s.ksusenum,
           r.ksqrsidt,
           r.ksqrsid1, 
           r.ksqrsid2,
           l.lmode,
           l.request,
           l.ctime,
           decode(l.lmode,0,0,l.block)  
      from v$_lock l, 
           x$ksuse s, 
           x$ksqrs r  
     where l.saddr=s.addr 
       and concat(USERENV('Instance'),l.raddr)=concat(r.inst_id,r.addr)
    
  • ksqgtl.entry - get lock function, entry point
    pid$target:oracle:ksqgtl*:entry 
    { 
      ksqeq_lkadr = arg0; /* X$KSQEQ.KSQLKADR*/
      mode = arg1;
      timeout = arg3;
      lock_indx = arg4; /*X$KSIRESTYP.INDX*/
      flags = arg7;
    } 
    
    Interesting ksqgtl function arguments saved for further usage. In particularly, arg0 - is X$KSQEQ.KSQLKADR (lock address?).
  • ksqgtl.return - get lock function, return
    pid$target:oracle:ksqgtl*:return 
    {
      ksqeq_lkres = *(long *)copyin(ksqeq_lkadr+8,8);
      ksqrs = *(struct ksqrs_t *)copyin(ksqeq_lkres,112);
      ksqrs.addr = ksqeq_lkres;
      printf("%d [%Y] %s: *** %s-%08x-%08x mode=%d flags=0x%x timeout=%d\n",
        timestamp,
        walltimestamp,
        probefunc,
        ksqrs.idt,
        ksqrs.id1,
        ksqrs.id2,
        mode, 
        flags, 
        timeout
      );
    }
    
    We will interesting in "ID1", "ID2", "TYPE" lock attributes. We can fully decode them from X$KSQRS fixed table.
    But ksqgtl called with X$KSQEQ.KSQLKADR. X$KSQEQ.KSQLKRES maps to X$KSQRS.ADDR.
    SQL> select c.kqfconam column_name,
      2         c.kqfcodty datatype,
      3         c.kqfcosiz size_byte,
      4         c.kqfcooff offset
      5    from x$kqfta t,
      6         x$kqfco c
      7   where t.kqftanam = 'X$KSQEQ'
      8     and c.kqfcotab = t.indx
      9   order by c.indx
     10  /
    
    COLUMN_NAME   DATATYPE  SIZE_BYTE     OFFSET
    ----------- ---------- ---------- ----------
    ADDR                23          8          0
    INDX                 2          4          0
    INST_ID              2          4          0
    KSSOBFLG             2          4          0
    KSSOBOWN            23          8          0
    KSQLKADR            23          8         88
    KSQLKRES            23          8         96
    KSQLKMOD             2          1        176
    KSQLKREQ             2          1        177
    KSQLKMXH             2          2        178
    KSQLKSES            23          8          0
    KSQLKCTIM            2          4          0
    KSQLKLBLK            2          4          0
    
    We will print debug output similar to Tanel Poder script and 10704 event format.
  • pid$target:oracle:ksqrcl*:entry 
    { 
      enqrs_addr = *(long *)copyin(arg0+8,8); /*X$KSQRS.ADDR enQueue Resource*/
      ksqrs = *(struct ksqrs_t *)copyin(enqrs_addr, 112);
      ksqrs.addr = enqrs_addr;
      printf("%d [%Y] %s: *** %s-%08x-%08x x$ksqrs.addr=0x%016x\n",
        timestamp,
        walltimestamp,
        probefunc,
        ksqrs.idt,
        ksqrs.id1,
        ksqrs.id2,
        ksqrs.addr
      );
    } 
    
  • ksqrcl function (release lock) entry point:
    pid$target:oracle:ksqrcl*:entry 
    { 
      enqrs_addr = *(long *)copyin(arg0+8,8); /*X$KSQRS.ADDR enQueue Resource*/
      ksqrs = *(struct ksqrs_t *)copyin(enqrs_addr, 112);
      ksqrs.addr = enqrs_addr;
      printf("%d [%Y] %s: *** %s-%08x-%08x x$ksqrs.addr=0x%016x\n",
        timestamp,
        walltimestamp,
        probefunc,
        ksqrs.idt,
        ksqrs.id1,
        ksqrs.id2,
        ksqrs.addr
      );
    } 
    
    We will populate a ksqrs struct again.
  • If lock type equals to parameter passed to script, we will suspend process execution:
    pid$target:oracle:ksqrcl*:entry 
    /ksqrs.idt==$$1/
    {
      printf("%d [%Y] %s.%s: lock_type=%s suspending execution\n", timestamp, walltimestamp, probefunc, probename, ksqrs.idt);
      stop();
    }
    
Short demonstration how this works.
Suppose I will want suspend server process when session acquires TM lock. 1. I opened new session to the DB and determine server process ID and tracefile location:
SQL> select p.spid,
  2         p.tracefile
  3    from v$session s,
  4         v$process p
  5   where s.sid = sys_context( 'userenv', 'sid')
  6     and p.addr = s.paddr
  7  /

SPID  TRACEFILE
----- ----------------------------------------------------------------------------------------------------
1968  /pub/home/oracle/diag/rdbms/orcl/orcl/trace/orcl_ora_1968.trc
2. I created test table:
SQL> create table t (x int);

Table created.
SQL> select object_id,
  2         to_char(object_id, 'fm0xxxxxxx') object_id_hex
  3    from obj
  4   where object_name='T';

 OBJECT_ID OBJECT_ID_HEX
---------- ---------------------------
    349673 000555e9
3. Now I enable additional diagnostics: event 10704 to trace enqueues:
SQL> alter session set events '10704 level 2';

Session altered.
4. Next I will run DTrace script enq_suspend.d passing server process ID as parameter:
oracle@localhost dtrace$ ./enq_suspend.d -p 1968 TM
5. Trying to truncate table T:

Session suspending.
6. Observed output into console with DTrace script:
oracle@localhost dtrace$ ./enq_suspend.d -p 1968 TM
33969273235645237 [2015 Feb  9 12:42:21] ksqgtlctx: *** TM-000555e9-00000000 mode=6 flags=0x401 timeout=0
33969273240887878 [2015 Feb  9 12:42:21] ksqgtlctx: *** TX-0004001b-0000f92e mode=6 flags=0x401 timeout=0
33969273245689382 [2015 Feb  9 12:42:21] ksqrcl: *** TX-0004001b-0000f92e x$ksqrs.addr=0x000000041cda5c90
33969273245764458 [2015 Feb  9 12:42:21] ksqrcli: *** TX-0004001b-0000f92e x$ksqrs.addr=0x000000041cda5c90
33969273246279405 [2015 Feb  9 12:42:21] ksqrcl: *** TM-000555e9-00000000 x$ksqrs.addr=0x000000041cda25d0
33969273246301204 [2015 Feb  9 12:42:21] ksqrcl.entry: lock_type=TM suspending execution
You can see that when calling ksqrcl with TM-000555e9-00000000 (our object id) execution is suspending. We can gather any required diagnostic data at this step. 7. 10704 event data in the trace file:
*** 2015-02-09 12:42:21.102
ksqgtl *** TM-000555e9-00000000 mode=6 flags=0x401 timeout=0 ***
ksqgtl: xcb=0x419a6f068, ktcdix=2147483647, topxcb=0x419a6f068
        ktcipt(topxcb)=0x0
ksucti: init txn DID from session DID 
ksqgtl:
        ksqlkdid: 0001-0019-00000142
*** ksudidTrace: ksqgtl
        ktcmydid(): 0001-0019-00000142
        ksusesdi:   0001-0019-00000143
        ksusetxn:   0001-0019-00000142
ksqgtl: RETURNS 0
ksqgtl *** TX-0004001b-0000f92e mode=6 flags=0x401 timeout=0 ***
ksqgtl: xcb=0x419a6f068, ktcdix=2147483647, topxcb=0x419a6f068
        ktcipt(topxcb)=0x0
ksucti: init session DID from txn DID: 
ksqgtl:
        ksqlkdid: 0001-0019-00000142
*** ksudidTrace: ksqgtl
        ktcmydid(): 0001-0019-00000142
        ksusesdi:   0001-0019-00000143
        ksusetxn:   0001-0019-00000142
ksqgtl: RETURNS 0
ksqrcl: TX,4001b,f92e
ksqrcl: returns 0
enq_suspend.d script can be further extending for more complex conditions: such as stop when session acquires TM lock with particular object_id and mode, and so on. Script verified in my environment: Oracle Database version 11.2.0.3 on Solaris 10 SPARC64.

Комментариев нет:

Отправить комментарий