Страницы

понедельник, 9 февраля 2015 г.

Suspending a users session on specific enqueue

Recently I faced a strange issue with EBR (Edition-Based Redefinition). In order to understand how things work internally I decided to use DTrace.
I wanted to suspend specific user session when session acquired AE lock. There are similar enq_trace.sh script that written by Tanel Poder. Tanel's script could be extended to accommodate to my specific requirements if needed.
My enq_suspend.d script is below:
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
#!/usr/sbin/dtrace -s
 
# pragma D option quiet
# pragma D option destructive
 
struct ksqrs_t {
  long     addr;
  char     b1[72];
  uint32_t id1;
  uint32_t id2;
  char     b2[4];
  char     idt[2];/*92=offset of X$KSQRS.KSQRSIDT column from x$kqfco*/
  char     b3[18];
};
 
struct ksqrs_t ksqrs;
 
pid$target:oracle:ksqgtl*:entry
{
  ksqeq_lkadr = arg0; /* X$KSQEQ.KSQLKADR*/
  mode = arg1;
  timeout = arg3;
  lock_indx = arg4; /*X$KSIRESTYP.INDX*/
  flags = arg7;
}
 
pid$target:oracle:ksqgtl*:return
{
  ksqeq_lkres = *(long *)copyin(ksqeq_lkadr+8,8);
  ksqrs = *(struct ksqrs_t *)copyin(ksqeq_lkres,112);
  ksqrs.addr = ksqeq_lkres;
  printf("%d [%Y] %s: *** %s-%08x-%08x mode=%d flags=0x%x timeout=%d\n",
    timestamp,
    walltimestamp,
    probefunc,
    ksqrs.idt,
    ksqrs.id1,
    ksqrs.id2,
    mode,
    flags,
    timeout
  );
}
 
pid$target:oracle:ksqrcl*:entry
{
  enqrs_addr = *(long *)copyin(arg0+8,8); /*X$KSQRS.ADDR enQueue Resource*/
  ksqrs = *(struct ksqrs_t *)copyin(enqrs_addr, 112);
  ksqrs.addr = enqrs_addr;
  printf("%d [%Y] %s: *** %s-%08x-%08x x$ksqrs.addr=0x%016x\n",
    timestamp,
    walltimestamp,
    probefunc,
    ksqrs.idt,
    ksqrs.id1,
    ksqrs.id2,
    ksqrs.addr
  );
}
 
pid$target:oracle:ksqrcl*:entry
/ksqrs.idt==$$1/
{
  printf("%d [%Y] %s.%s: lock_type=%s suspending execution\n", timestamp, walltimestamp, probefunc, probename, ksqrs.idt);
  stop();
}
 
pid$target:oracle:ksqrcl*:return {}
A brief description of the script.
  • 1
    2
    # pragma D option quiet
    # pragma D option destructive
    Minimize output, allow destructive actions. We will use "stop()" call further.
  • struct ksqrs_t
    1
    2
    3
    4
    5
    6
    7
    8
    9
    struct ksqrs_t {
      long     addr;
      char     b1[72];/*unknown not needed*/
      uint32_t id1;/*lock ID1*/
      uint32_t id2;/*lock ID2*/
      char     b2[4];/*unknown not needed*/
      char     idt[2];/*lock type, 92=offset of X$KSQRS.KSQRSIDT column from x$kqfco*/
      char     b3[18];/*unknown not needed*/
    };
    The above structure describes X$KSQRS entry (Kernel Services enQueue ReSource):
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    SQL> select c.kqfconam column_name,
      2         c.kqfcodty datatype,
      3         c.kqfcosiz size_byte,
      4         c.kqfcooff offset
      5    from x$kqfta t,
      6         x$kqfco c
      7   where t.kqftanam = 'X$KSQRS'
      8     and c.kqfcotab = t.indx
      9   order by c.indx
     10  /
     
    COLUMN_NAME   DATATYPE  SIZE_BYTE     OFFSET
    ----------- ---------- ---------- ----------
    ADDR                23          8          0
    INDX                 2          4          0
    INST_ID              2          4          0
    KSQRSID1             2          4         80
    KSQRSID2             2          4         84
    KSQRSIDT             1          2         92
    KSQRSFLG             2          1        111
    ID1,ID2,IDT columns maps to relevant GV$LOCK columns, this can be obtained from V$FIXED_VIEW_DEFINITION:
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    select s.inst_id,
           l.laddr,
           l.kaddr,
           s.ksusenum,
           r.ksqrsidt,
           r.ksqrsid1,
           r.ksqrsid2,
           l.lmode,
           l.request,
           l.ctime,
           decode(l.lmode,0,0,l.block) 
      from v$_lock l,
           x$ksuse s,
           x$ksqrs r 
     where l.saddr=s.addr
       and concat(USERENV('Instance'),l.raddr)=concat(r.inst_id,r.addr)
  • ksqgtl.entry - get lock function, entry point
    1
    2
    3
    4
    5
    6
    7
    8
    pid$target:oracle:ksqgtl*:entry
    {
      ksqeq_lkadr = arg0; /* X$KSQEQ.KSQLKADR*/
      mode = arg1;
      timeout = arg3;
      lock_indx = arg4; /*X$KSIRESTYP.INDX*/
      flags = arg7;
    }
    Interesting ksqgtl function arguments saved for further usage. In particularly, arg0 - is X$KSQEQ.KSQLKADR (lock address?).
  • ksqgtl.return - get lock function, return
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    pid$target:oracle:ksqgtl*:return
    {
      ksqeq_lkres = *(long *)copyin(ksqeq_lkadr+8,8);
      ksqrs = *(struct ksqrs_t *)copyin(ksqeq_lkres,112);
      ksqrs.addr = ksqeq_lkres;
      printf("%d [%Y] %s: *** %s-%08x-%08x mode=%d flags=0x%x timeout=%d\n",
        timestamp,
        walltimestamp,
        probefunc,
        ksqrs.idt,
        ksqrs.id1,
        ksqrs.id2,
        mode,
        flags,
        timeout
      );
    }
    We will interesting in "ID1", "ID2", "TYPE" lock attributes. We can fully decode them from X$KSQRS fixed table.
    But ksqgtl called with X$KSQEQ.KSQLKADR. X$KSQEQ.KSQLKRES maps to X$KSQRS.ADDR.
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    SQL> select c.kqfconam column_name,
      2         c.kqfcodty datatype,
      3         c.kqfcosiz size_byte,
      4         c.kqfcooff offset
      5    from x$kqfta t,
      6         x$kqfco c
      7   where t.kqftanam = 'X$KSQEQ'
      8     and c.kqfcotab = t.indx
      9   order by c.indx
     10  /
     
    COLUMN_NAME   DATATYPE  SIZE_BYTE     OFFSET
    ----------- ---------- ---------- ----------
    ADDR                23          8          0
    INDX                 2          4          0
    INST_ID              2          4          0
    KSSOBFLG             2          4          0
    KSSOBOWN            23          8          0
    KSQLKADR            23          8         88
    KSQLKRES            23          8         96
    KSQLKMOD             2          1        176
    KSQLKREQ             2          1        177
    KSQLKMXH             2          2        178
    KSQLKSES            23          8          0
    KSQLKCTIM            2          4          0
    KSQLKLBLK            2          4          0
    We will print debug output similar to Tanel Poder script and 10704 event format.
  • 1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    pid$target:oracle:ksqrcl*:entry
    {
      enqrs_addr = *(long *)copyin(arg0+8,8); /*X$KSQRS.ADDR enQueue Resource*/
      ksqrs = *(struct ksqrs_t *)copyin(enqrs_addr, 112);
      ksqrs.addr = enqrs_addr;
      printf("%d [%Y] %s: *** %s-%08x-%08x x$ksqrs.addr=0x%016x\n",
        timestamp,
        walltimestamp,
        probefunc,
        ksqrs.idt,
        ksqrs.id1,
        ksqrs.id2,
        ksqrs.addr
      );
    }
  • ksqrcl function (release lock) entry point:
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    pid$target:oracle:ksqrcl*:entry
    {
      enqrs_addr = *(long *)copyin(arg0+8,8); /*X$KSQRS.ADDR enQueue Resource*/
      ksqrs = *(struct ksqrs_t *)copyin(enqrs_addr, 112);
      ksqrs.addr = enqrs_addr;
      printf("%d [%Y] %s: *** %s-%08x-%08x x$ksqrs.addr=0x%016x\n",
        timestamp,
        walltimestamp,
        probefunc,
        ksqrs.idt,
        ksqrs.id1,
        ksqrs.id2,
        ksqrs.addr
      );
    }
    We will populate a ksqrs struct again.
  • If lock type equals to parameter passed to script, we will suspend process execution:
    1
    2
    3
    4
    5
    6
    pid$target:oracle:ksqrcl*:entry
    /ksqrs.idt==$$1/
    {
      printf("%d [%Y] %s.%s: lock_type=%s suspending execution\n", timestamp, walltimestamp, probefunc, probename, ksqrs.idt);
      stop();
    }
Short demonstration how this works.
Suppose I will want suspend server process when session acquires TM lock. 1. I opened new session to the DB and determine server process ID and tracefile location:
1
2
3
4
5
6
7
8
9
10
11
SQL> select p.spid,
  2         p.tracefile
  3    from v$session s,
  4         v$process p
  5   where s.sid = sys_context( 'userenv', 'sid')
  6     and p.addr = s.paddr
  7  /
 
SPID  TRACEFILE
----- ----------------------------------------------------------------------------------------------------
1968  /pub/home/oracle/diag/rdbms/orcl/orcl/trace/orcl_ora_1968.trc
2. I created test table:
1
2
3
4
5
6
7
8
9
10
11
SQL> create table t (x int);
 
Table created.
SQL> select object_id,
  2         to_char(object_id, 'fm0xxxxxxx') object_id_hex
  3    from obj
  4   where object_name='T';
 
 OBJECT_ID OBJECT_ID_HEX
---------- ---------------------------
    349673 000555e9
3. Now I enable additional diagnostics: event 10704 to trace enqueues:
1
2
3
SQL> alter session set events '10704 level 2';
 
Session altered.
4. Next I will run DTrace script enq_suspend.d passing server process ID as parameter:
1
oracle@localhost dtrace$ ./enq_suspend.d -p 1968 TM
5. Trying to truncate table T:
1
 
Session suspending. 6. Observed output into console with DTrace script:
1
2
3
4
5
6
7
oracle@localhost dtrace$ ./enq_suspend.d -p 1968 TM
33969273235645237 [2015 Feb  9 12:42:21] ksqgtlctx: *** TM-000555e9-00000000 mode=6 flags=0x401 timeout=0
33969273240887878 [2015 Feb  9 12:42:21] ksqgtlctx: *** TX-0004001b-0000f92e mode=6 flags=0x401 timeout=0
33969273245689382 [2015 Feb  9 12:42:21] ksqrcl: *** TX-0004001b-0000f92e x$ksqrs.addr=0x000000041cda5c90
33969273245764458 [2015 Feb  9 12:42:21] ksqrcli: *** TX-0004001b-0000f92e x$ksqrs.addr=0x000000041cda5c90
33969273246279405 [2015 Feb  9 12:42:21] ksqrcl: *** TM-000555e9-00000000 x$ksqrs.addr=0x000000041cda25d0
33969273246301204 [2015 Feb  9 12:42:21] ksqrcl.entry: lock_type=TM suspending execution
You can see that when calling ksqrcl with TM-000555e9-00000000 (our object id) execution is suspending. We can gather any required diagnostic data at this step. 7. 10704 event data in the trace file:
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
*** 2015-02-09 12:42:21.102
ksqgtl *** TM-000555e9-00000000 mode=6 flags=0x401 timeout=0 ***
ksqgtl: xcb=0x419a6f068, ktcdix=2147483647, topxcb=0x419a6f068
        ktcipt(topxcb)=0x0
ksucti: init txn DID from session DID
ksqgtl:
        ksqlkdid: 0001-0019-00000142
*** ksudidTrace: ksqgtl
        ktcmydid(): 0001-0019-00000142
        ksusesdi:   0001-0019-00000143
        ksusetxn:   0001-0019-00000142
ksqgtl: RETURNS 0
ksqgtl *** TX-0004001b-0000f92e mode=6 flags=0x401 timeout=0 ***
ksqgtl: xcb=0x419a6f068, ktcdix=2147483647, topxcb=0x419a6f068
        ktcipt(topxcb)=0x0
ksucti: init session DID from txn DID:
ksqgtl:
        ksqlkdid: 0001-0019-00000142
*** ksudidTrace: ksqgtl
        ktcmydid(): 0001-0019-00000142
        ksusesdi:   0001-0019-00000143
        ksusetxn:   0001-0019-00000142
ksqgtl: RETURNS 0
ksqrcl: TX,4001b,f92e
ksqrcl: returns 0
enq_suspend.d script can be further extending for more complex conditions: such as stop when session acquires TM lock with particular object_id and mode, and so on. Script verified in my environment: Oracle Database version 11.2.0.3 on Solaris 10 SPARC64.

Комментариев нет:

Отправить комментарий

Примечание. Отправлять комментарии могут только участники этого блога.