Line data Source code
1 : /*
2 : * SPDX-License-Identifier: MPL-2.0
3 : *
4 : * This Source Code Form is subject to the terms of the Mozilla Public
5 : * License, v. 2.0. If a copy of the MPL was not distributed with this
6 : * file, You can obtain one at http://mozilla.org/MPL/2.0/.
7 : *
8 : * Copyright 2024 MonetDB Foundation;
9 : * Copyright August 2008 - 2023 MonetDB B.V.;
10 : * Copyright 1997 - July 2008 CWI.
11 : */
12 :
13 : /* (c) M.L. Kersten
14 : * Performance tracing
15 : * The stethoscope/tachograph and tomograph performance monitors have exclusive
16 : * access to a single event stream, which avoids concurrency conflicts amongst
17 : * clients.
18 : * It also avoid cluthered event records on the stream. Since this event stream
19 : * is owned by a client, we should ensure that the profiler is automatically
20 : * reset once the owner leaves.
21 : */
22 : #include "monetdb_config.h"
23 : #include "mutils.h" /* mercurial_revision */
24 : #include "msabaoth.h" /* msab_getUUID */
25 : #include "mal_authorize.h"
26 : #include "mal_function.h"
27 : #include "mal_listing.h"
28 : #include "mal_profiler.h"
29 : #include "mal_runtime.h"
30 : #include "mal_utils.h"
31 : #include "mal_resource.h"
32 : #include "mal_internal.h"
33 :
34 : #ifdef HAVE_SYS_TIME_H
35 : #include <sys/time.h>
36 : #endif
37 :
38 : #include <string.h>
39 :
40 : static const char *myname = 0; // avoid tracing the profiler module
41 :
42 : /* The JSON rendering can be either using '\n' separators between
43 : * each key:value pair or as a single line.
44 : * The current stethoscope implementation requires the first option and
45 : * also the term rendering to be set to ''
46 : */
47 :
48 : int profilerStatus = 0; /* global flag profiler status */
49 : int profilerMode = 0; /* global flag profiler mode, minimal or detailed */
50 : static oid profilerUser; /* keep track on who has claimed the channel */
51 :
52 : static struct timeval startup_time;
53 :
54 : static ATOMIC_TYPE hbdelay = ATOMIC_VAR_INIT(0);
55 :
56 : #ifdef HAVE_SYS_RESOURCE_H
57 : #include <sys/resource.h>
58 : struct rusage infoUsage;
59 : static struct rusage prevUsage;
60 : #endif
61 :
62 : #define LOGLEN 8192
63 :
64 : static void
65 2 : logjsonInternal(char *logbuffer, bool flush)
66 : {
67 2 : size_t len;
68 2 : len = strlen(logbuffer);
69 :
70 2 : (void) mnstr_write(maleventstream, logbuffer, 1, len);
71 2 : if (flush)
72 1 : (void) mnstr_flush(maleventstream, MNSTR_FLUSH_DATA);
73 2 : }
74 :
75 : /*
76 : * We use a buffer (`logbuffer`) where we incrementally create the output JSON
77 : * object. Initially we allocate LOGLEN (8K)
78 : * bytes and we keep the capacity of the buffer (`logcap`) and the length of the
79 : * current string (`loglen`).
80 : *
81 : * We use the `logadd` function to add data to our buffer (usually key-value
82 : * pairs). This macro offers an interface similar to printf.
83 : *
84 : * The first snprintf below happens in a statically allocated buffer that might
85 : * be much smaller than logcap. We do not care. We only need to perform this
86 : * snprintf to get the actual length of the string that is to be produced.
87 : *
88 : * There are three cases:
89 : *
90 : * 1. The new string fits in the current buffer -> we just update the buffer
91 : *
92 : * 2. The new string does not fit in the current buffer, but is smaller than the
93 : * capacity of the buffer -> we output the current contents of the buffer and
94 : * start at the beginning.
95 : *
96 : * 3. The new string exceeds the current capacity of the buffer -> we output the
97 : * current contents and reallocate the buffer. The new capacity is 1.5 times the
98 : * length of the new string.
99 : */
100 : struct logbuf {
101 : char *logbuffer;
102 : char *logbase;
103 : size_t loglen;
104 : size_t logcap;
105 : };
106 :
107 : static inline void
108 844 : lognew(struct logbuf *logbuf)
109 : {
110 844 : logbuf->loglen = 0;
111 844 : logbuf->logbase = logbuf->logbuffer;
112 844 : *logbuf->logbase = 0;
113 844 : }
114 :
115 : static inline void
116 0 : logdel(struct logbuf *logbuf)
117 : {
118 0 : GDKfree(logbuf->logbuffer);
119 0 : logbuf->logbuffer = NULL;
120 0 : }
121 :
122 : static bool logadd(struct logbuf *logbuf,
123 : _In_z_ _Printf_format_string_ const char *fmt, ...)
124 : __attribute__((__format__(__printf__, 2, 3)))
125 : __attribute__((__warn_unused_result__));
126 : static bool
127 33013 : logadd(struct logbuf *logbuf, const char *fmt, ...)
128 : {
129 33013 : char tmp_buff[LOGLEN];
130 33013 : int tmp_len;
131 33013 : va_list va;
132 :
133 33013 : va_start(va, fmt);
134 33013 : tmp_len = vsnprintf(tmp_buff, sizeof(tmp_buff), fmt, va);
135 33013 : va_end(va);
136 33013 : if (tmp_len < 0) {
137 0 : logdel(logbuf);
138 0 : return false;
139 : }
140 33013 : if (logbuf->loglen + (size_t) tmp_len >= logbuf->logcap) {
141 844 : if ((size_t) tmp_len >= logbuf->logcap) {
142 : /* includes first time when logbuffer == NULL and logcap == 0 */
143 843 : char *alloc_buff;
144 843 : if (logbuf->loglen > 0)
145 0 : logjsonInternal(logbuf->logbuffer, false);
146 843 : logbuf->logcap = (size_t) tmp_len + (size_t) tmp_len / 2;
147 843 : if (logbuf->logcap < LOGLEN)
148 843 : logbuf->logcap = LOGLEN;
149 843 : alloc_buff = GDKrealloc(logbuf->logbuffer, logbuf->logcap);
150 843 : if (alloc_buff == NULL) {
151 0 : TRC_ERROR(MAL_SERVER,
152 : "Profiler JSON buffer reallocation failure\n");
153 0 : logdel(logbuf);
154 0 : return false;
155 : }
156 843 : logbuf->logbuffer = alloc_buff;
157 843 : lognew(logbuf);
158 : } else {
159 1 : logjsonInternal(logbuf->logbuffer, false);
160 1 : lognew(logbuf);
161 : }
162 : }
163 33013 : if (tmp_len > 0) {
164 33014 : va_start(va, fmt);
165 33014 : logbuf->loglen += vsnprintf(logbuf->logbase + logbuf->loglen,
166 33014 : logbuf->logcap - logbuf->loglen, fmt, va);
167 33014 : va_end(va);
168 : }
169 : return true;
170 : }
171 :
172 : static str phase_descriptions[] = {
173 : [CLIENT_START] = "session_start",
174 : [CLIENT_END] = "session_end",
175 : [TEXT_TO_SQL] = "text_to_sql",
176 : [SQL_TO_REL] = "sql_to_rel",
177 : [REL_OPT] = "rel_opt",
178 : [REL_TO_MAL] = "rel_to_mal",
179 : [MAL_OPT] = "mal_opt",
180 : [MAL_ENGINE] = "mal_engine",
181 : [COMMIT] = "trans_commit",
182 : [ROLLBACK] = "trans_rollback",
183 : [CONFLICT] = "trans_conflict"
184 : };
185 :
186 : static str
187 0 : prepareNonMalEvent(Client cntxt, enum event_phase phase, ulng clk, ulng *tstart,
188 : ulng *tend, int state, ulng duration)
189 : {
190 0 : oid *tag = NULL;
191 0 : str query = NULL;
192 0 : struct logbuf logbuf = { 0 };
193 :
194 0 : uint64_t mclk = (uint64_t) clk -
195 0 : ((uint64_t) startup_time.tv_sec * 1000000 -
196 0 : (uint64_t) startup_time.tv_usec);
197 :
198 0 : assert(cntxt);
199 0 : int sessionid = cntxt->idx;
200 0 : if (cntxt->curprg)
201 0 : tag = &cntxt->curprg->def->tag;
202 0 : if (cntxt->query
203 0 : && (query = mal_quote(cntxt->query, strlen(cntxt->query))) == NULL)
204 : return NULL;
205 :
206 0 : if (!logadd(&logbuf, "{\"sessionid\":\"%d\"", sessionid))
207 0 : goto cleanup_and_exit;
208 0 : if (!logadd(&logbuf, ", \"clk\":" ULLFMT "", mclk))
209 0 : goto cleanup_and_exit;
210 0 : if (!logadd(&logbuf, ", \"thread\":%zu, \"phase\":\"%s\"",
211 : MT_getpid(), phase_descriptions[phase]))
212 0 : goto cleanup_and_exit;
213 0 : if (tstart && !logadd(&logbuf, ", \"tstart\":" ULLFMT, *tstart))
214 0 : goto cleanup_and_exit;
215 0 : if (tend && !logadd(&logbuf, ", \"tend\":" ULLFMT, *tend))
216 0 : goto cleanup_and_exit;
217 0 : if (tag && !logadd(&logbuf, ", \"tag\":" OIDFMT, *tag))
218 0 : goto cleanup_and_exit;
219 0 : if (query && phase == TEXT_TO_SQL
220 0 : && !logadd(&logbuf, ", \"query\":\"%s\"", query))
221 0 : goto cleanup_and_exit;
222 0 : if (state != 0 && !logadd(&logbuf, ", \"state\":\"error\""))
223 0 : goto cleanup_and_exit;
224 0 : if (!logadd(&logbuf, ", \"usec\":" ULLFMT "}\n", duration))
225 0 : goto cleanup_and_exit;
226 0 : GDKfree(query);
227 0 : return logbuf.logbuffer;
228 0 : cleanup_and_exit:
229 0 : GDKfree(query);
230 0 : logdel(&logbuf);
231 0 : return NULL;
232 : }
233 :
234 : static inline str
235 1673 : format_val2json(const ValPtr res)
236 : {
237 1673 : char *buf = NULL;
238 1673 : size_t sz = 0;
239 :
240 3346 : if (BATatoms[res->vtype].atomNull &&
241 1673 : (!VALget(res)
242 1673 : || BATatoms[res->vtype].atomCmp(VALget(res),
243 : BATatoms[res->vtype].atomNull) == 0))
244 75 : return GDKstrdup("\"nil\"");
245 :
246 1597 : bool use_external = true;
247 :
248 1597 : switch (res->vtype) {
249 413 : case TYPE_bte:
250 : case TYPE_sht:
251 : case TYPE_int:
252 : case TYPE_flt:
253 : case TYPE_dbl:
254 : case TYPE_lng:
255 : #ifdef HAVE_HGE
256 : case TYPE_hge:
257 : #endif
258 413 : use_external = false;
259 : }
260 :
261 1597 : if ((*BATatoms[res->vtype].atomToStr) (&buf, &sz, VALptr(res),
262 : use_external) < 0)
263 : return NULL;
264 :
265 1598 : if (!use_external || res->vtype == TYPE_str)
266 757 : return buf;
267 :
268 841 : ValRecord val;
269 841 : if (VALinit(&val, TYPE_str, buf) == NULL) {
270 0 : GDKfree(buf);
271 0 : return NULL;
272 : }
273 :
274 841 : GDKfree(buf);
275 :
276 841 : char *buf2;
277 841 : buf2 = VALformat(&val);
278 841 : VALclear(&val);
279 :
280 841 : return buf2;
281 : }
282 :
283 : static str
284 843 : prepareMalEvent(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr pci,
285 : lng clk, lng ticks)
286 : {
287 843 : struct logbuf logbuf;
288 843 : str c;
289 843 : uint64_t mclk;
290 843 : bool ok;
291 843 : const char *algo = MT_thread_getalgorithm();
292 :
293 : /* The stream of events can be complete read by the DBA,
294 : * all other users can only see events assigned to their account
295 : */
296 843 : if (profilerUser != MAL_ADMIN && profilerUser != cntxt->user)
297 : return NULL;
298 :
299 843 : logbuf = (struct logbuf) { 0 };
300 :
301 843 : mclk = (uint64_t) clk - ((uint64_t) startup_time.tv_sec * 1000000 -
302 843 : (uint64_t) startup_time.tv_usec);
303 : /* make profile event tuple */
304 1686 : if (!logadd(&logbuf, "{" // fill in later with the event counter
305 : "\"sessionid\":\"%d\""
306 : ",\"clk\":%" PRIu64 ""
307 : ",\"thread\":%zu"
308 : ",\"phase\":\"%s\""
309 : ",\"pc\":%d"
310 : ",\"tag\":" OIDFMT,
311 : cntxt->idx,
312 : mclk,
313 : MT_getpid(),
314 : phase_descriptions[MAL_ENGINE],
315 842 : mb ? getPC(mb, pci) : 0, stk ? stk->tag : 0))
316 0 : goto cleanup_and_exit;
317 843 : if (pci->modname
318 807 : && !logadd(&logbuf, ",\"module\":\"%s\"",
319 : pci->modname ? pci->modname : ""))
320 0 : goto cleanup_and_exit;
321 843 : if (pci->fcnname
322 807 : && !logadd(&logbuf, ",\"function\":\"%s\"",
323 : pci->fcnname ? pci->fcnname : ""))
324 0 : goto cleanup_and_exit;
325 843 : if (pci->barrier
326 4 : && !logadd(&logbuf, ",\"barrier\":\"%s\"", operatorName(pci->barrier)))
327 0 : goto cleanup_and_exit;
328 915 : if ((pci->token < FCNcall || pci->token > PATcall) &&
329 72 : !logadd(&logbuf, ",\"operator\":\"%s\"", operatorName(pci->token)))
330 0 : goto cleanup_and_exit;
331 843 : if (!logadd(&logbuf, ",\"usec\":" LLFMT, ticks))
332 0 : goto cleanup_and_exit;
333 843 : if (algo && !logadd(&logbuf, ",\"algorithm\":\"%s\"", algo))
334 0 : goto cleanup_and_exit;
335 843 : if (mb && pci->modname && pci->fcnname) {
336 807 : int j;
337 :
338 807 : if (profilerMode == 0 && stk) {
339 807 : if (!logadd(&logbuf, ",\"args\":["))
340 0 : goto cleanup_and_exit;
341 807 : char name[IDLENGTH] = { 0 };
342 4143 : for (j = 0; j < pci->argc; j++) {
343 3336 : int tpe = getVarType(mb, getArg(pci, j));
344 3336 : str tname = 0, cv;
345 3336 : lng total = 0;
346 3336 : BUN cnt = 0;
347 3336 : bat bid = 0;
348 :
349 3336 : if (j == 0) {
350 : // No comma at the beginning
351 807 : if (!logadd(&logbuf, "{"))
352 0 : goto cleanup_and_exit;
353 : } else {
354 2529 : if (!logadd(&logbuf, ",{"))
355 0 : goto cleanup_and_exit;
356 : }
357 6672 : if (!logadd(&logbuf, "\"%s\":%d,\"var\":\"%s\"",
358 3336 : j < pci->retc ? "ret" : "arg", j,
359 : getVarNameIntoBuffer(mb, getArg(pci, j), name)))
360 0 : goto cleanup_and_exit;
361 : //c = getVarName(mb, getArg(pci, j), name);
362 3336 : if (getVarSTC(mb, getArg(pci, j))) {
363 0 : InstrPtr stc = getInstrPtr(mb, getVarSTC(mb, getArg(pci, j)));
364 0 : if (stc && getModuleId(stc)
365 0 : && strcmp(getModuleId(stc), "sql") == 0
366 0 : && strncmp(getFunctionId(stc), "bind", 4) == 0
367 0 : && !logadd(&logbuf, ",\"alias\":\"%s.%s.%s\"",
368 0 : getVarConstant(mb,
369 : getArg(stc,
370 : stc->retc +
371 : 1)).val.sval,
372 0 : getVarConstant(mb,
373 : getArg(stc,
374 : stc->retc +
375 : 2)).val.sval,
376 0 : getVarConstant(mb,
377 : getArg(stc,
378 : stc->retc +
379 : 3)).val.sval))
380 0 : goto cleanup_and_exit;
381 : }
382 3336 : if (isaBatType(tpe)) {
383 1663 : BAT *d = BATdescriptor(bid = stk->stk[getArg(pci, j)].val.bval);
384 1662 : tname = getTypeName(getBatType(tpe));
385 1663 : ok = logadd(&logbuf, ",\"type\":\"bat[:%s]\"", tname);
386 1663 : GDKfree(tname);
387 1663 : if (!ok) {
388 0 : BBPreclaim(d);
389 0 : goto cleanup_and_exit;
390 : }
391 1663 : if (d) {
392 1500 : MT_lock_set(&d->theaplock);
393 1498 : BATiter di = bat_iterator_nolock(d);
394 : /* outside the lock we cannot dereference di.h or di.vh,
395 : * but we can use all values without dereference and
396 : * without further locking */
397 1498 : MT_lock_unset(&d->theaplock);
398 1500 : cnt = di.count;
399 1500 : if (VIEWtparent(d)) {
400 176 : BAT *v = BBP_desc(VIEWtparent(d));
401 176 : MT_lock_set(&v->theaplock);
402 176 : bool vtransient = v->batTransient;
403 176 : MT_lock_unset(&v->theaplock);
404 352 : if (!logadd(&logbuf,
405 : ",\"view\":\"true\""
406 : ",\"parent\":%d"
407 : ",\"seqbase\":" BUNFMT
408 : ",\"mode\":\"%s\"",
409 176 : VIEWtparent(d),
410 : d->hseqbase,
411 : vtransient ? "transient" :
412 : "persistent")) {
413 0 : BBPunfix(d->batCacheid);
414 0 : goto cleanup_and_exit;
415 : }
416 : } else {
417 1324 : if (!logadd(&logbuf, ",\"mode\":\"%s\"",
418 1324 : (di.transient ? "transient" : "persistent"))) {
419 0 : BBPunfix(d->batCacheid);
420 0 : goto cleanup_and_exit;
421 : }
422 : }
423 1500 : if (!logadd(&logbuf,
424 : ",\"sorted\":%d"
425 : ",\"revsorted\":%d"
426 : ",\"nonil\":%d"
427 : ",\"nil\":%d"
428 : ",\"key\":%d",
429 1500 : di.sorted,
430 1500 : di.revsorted, di.nonil, di.nil, di.key)) {
431 0 : BBPunfix(d->batCacheid);
432 0 : goto cleanup_and_exit;
433 : }
434 1676 : if ((di.minpos != BUN_NONE &&
435 176 : !logadd(&logbuf, ",\"minpos\":\"" BUNFMT "\"",
436 1500 : di.minpos)) || (di.maxpos != BUN_NONE
437 177 : && !logadd(&logbuf,
438 : ",\"maxpos\":\""
439 : BUNFMT "\"",
440 : di.maxpos))
441 1500 : || (di.unique_est != 0
442 458 : && !logadd(&logbuf, ",\"nestimate\":\"%g\"",
443 : di.unique_est))) {
444 0 : BBPunfix(d->batCacheid);
445 0 : goto cleanup_and_exit;
446 : }
447 :
448 1500 : cv = VALformat(&stk->stk[getArg(pci, j)]);
449 1500 : if (cv) {
450 1500 : c = strchr(cv, '>');
451 1500 : if (c) /* unlikely that this isn't true */
452 0 : *c = 0;
453 1500 : ok = logadd(&logbuf, ",\"file\":\"%s\"", cv + 1);
454 1500 : GDKfree(cv);
455 1500 : if (!ok) {
456 0 : BBPunfix(d->batCacheid);
457 0 : goto cleanup_and_exit;
458 : }
459 : }
460 1500 : total += cnt << di.shift;
461 1500 : if (!logadd(&logbuf, ",\"width\":%d", di.width)) {
462 0 : BBPunfix(d->batCacheid);
463 0 : goto cleanup_and_exit;
464 : }
465 : /* keeping information about the individual auxiliary heaps is helpful during analysis. */
466 1500 : MT_rwlock_rdlock(&d->thashlock);
467 1500 : if (d->thash
468 0 : && !logadd(&logbuf, ",\"hash\":" LLFMT,
469 0 : (lng) hashinfo(d->thash,
470 : d->batCacheid))) {
471 0 : MT_rwlock_rdunlock(&d->thashlock);
472 0 : BBPunfix(d->batCacheid);
473 0 : goto cleanup_and_exit;
474 : }
475 1500 : MT_rwlock_rdunlock(&d->thashlock);
476 1500 : if (di.vh
477 241 : && !logadd(&logbuf, ",\"vheap\":" BUNFMT,
478 : di.vhfree)) {
479 0 : BBPunfix(d->batCacheid);
480 0 : goto cleanup_and_exit;
481 : }
482 : /* if (!logadd(&logbuf, "\"debug\":\"%s\",", d->debugmessages)) goto cleanup_and_exit; */
483 1500 : BBPunfix(d->batCacheid);
484 : }
485 1663 : if (!logadd(&logbuf,
486 : ",\"bid\":%d"
487 : ",\"count\":" BUNFMT
488 : ",\"size\":" LLFMT, bid, cnt, total))
489 0 : goto cleanup_and_exit;
490 : } else {
491 1673 : tname = getTypeName(tpe);
492 3346 : ok = logadd(&logbuf,
493 : ",\"type\":\"%s\""
494 : ",\"const\":%d",
495 1673 : tname, isVarConstant(mb, getArg(pci, j)));
496 1673 : GDKfree(tname);
497 1673 : if (!ok)
498 0 : goto cleanup_and_exit;
499 1673 : cv = format_val2json(&stk->stk[getArg(pci, j)]);
500 1673 : if (cv)
501 1673 : ok = logadd(&logbuf, ",\"value\":%s", cv);
502 3346 : GDKfree(cv);
503 1673 : if (!ok)
504 0 : goto cleanup_and_exit;
505 : }
506 3336 : if (!logadd(&logbuf, ",\"eol\":%d", getVarEolife(mb, getArg(pci, j))))
507 0 : goto cleanup_and_exit;
508 : // if (!logadd(&logbuf, ",\"fixed\":%d", isVarFixed(mb,getArg(pci,j)))) return NULL;
509 3336 : if (!logadd(&logbuf, "}"))
510 0 : goto cleanup_and_exit;
511 : }
512 807 : if (!logadd(&logbuf, "]")) // end marker for arguments
513 0 : goto cleanup_and_exit;
514 : }
515 : }
516 843 : if (!logadd(&logbuf, "}\n")) // end marker
517 0 : goto cleanup_and_exit;
518 843 : return logbuf.logbuffer;
519 0 : cleanup_and_exit:
520 0 : logdel(&logbuf);
521 0 : return NULL;
522 : }
523 :
524 : /* the OS details on cpu load are read from /proc/stat
525 : * We should use an OS define to react to the maximal cores
526 : */
527 : #define MAXCORES 256
528 : #define LASTCPU (MAXCORES - 1)
529 : static struct {
530 : lng user, nice, system, idle, iowait;
531 : double load;
532 : } corestat[MAXCORES];
533 :
534 : static int
535 10 : getCPULoad(char cpuload[BUFSIZ])
536 : {
537 10 : int cpu, len = 0, i;
538 10 : lng user, nice, system, idle, iowait;
539 10 : size_t n;
540 10 : char buf[512], *s;
541 10 : static FILE *proc = NULL;
542 10 : lng newload;
543 :
544 10 : if (proc == NULL) {
545 1 : proc = fopen("/proc/stat", "r");
546 1 : if (proc == NULL) {
547 : /* unexpected */
548 : return -1;
549 : }
550 : } else
551 9 : rewind(proc);
552 :
553 130 : while (fgets(buf, (int) sizeof(buf), proc) != NULL) {
554 120 : n = strlen(buf);
555 120 : if (strncmp(buf, "cpu", 3) == 0) {
556 50 : s = buf + 3;
557 50 : if (*s == ' ') {
558 : cpu = LASTCPU; // the cpu totals stored here
559 : } else {
560 40 : cpu = atoi(s);
561 40 : if (cpu < 0 || cpu > LASTCPU)
562 10 : cpu = LASTCPU;
563 : }
564 50 : s = strchr(s, ' ');
565 50 : if (s == NULL) /* unexpected format of file */
566 : break;
567 110 : while (*s && isspace((unsigned char) *s))
568 60 : s++;
569 50 : i = sscanf(s, LLSCN " " LLSCN " " LLSCN " " LLSCN " " LLSCN, &user,
570 : &nice, &system, &idle, &iowait);
571 50 : if (i == 5) {
572 50 : newload = (user - corestat[cpu].user + nice - corestat[cpu].nice +
573 50 : system - corestat[cpu].system);
574 50 : if (newload)
575 9 : corestat[cpu].load = (double) newload / (newload + idle -
576 9 : corestat[cpu].idle + iowait -
577 9 : corestat[cpu].iowait);
578 50 : corestat[cpu].user = user;
579 50 : corestat[cpu].nice = nice;
580 50 : corestat[cpu].system = system;
581 50 : corestat[cpu].idle = idle;
582 50 : corestat[cpu].iowait = iowait;
583 : }
584 : }
585 :
586 140 : while (buf[n - 1] != '\n') {
587 20 : if (fgets(buf, (int) sizeof(buf), proc) == NULL)
588 0 : goto exitloop;
589 20 : n = strlen(buf);
590 : }
591 : }
592 10 : exitloop:
593 :
594 10 : if (cpuload == NULL)
595 : return 0;
596 : // identify core processing
597 0 : len += snprintf(cpuload, BUFSIZ, "[");
598 0 : for (cpu = 0; cpuload && cpu < LASTCPU && corestat[cpu].user; cpu++) {
599 0 : len += snprintf(cpuload + len, BUFSIZ - len, "%s%.2f", (cpu ? "," : ""),
600 : corestat[cpu].load);
601 : }
602 0 : (void) snprintf(cpuload + len, BUFSIZ - len, "]");
603 0 : return 0;
604 : }
605 :
606 : void
607 0 : profilerHeartbeatEvent(char *alter)
608 : {
609 0 : char cpuload[BUFSIZ];
610 0 : struct logbuf logbuf;
611 0 : lng usec;
612 0 : uint64_t microseconds;
613 :
614 0 : if (ATOMIC_GET(&hbdelay) == 0 || maleventstream == 0)
615 0 : return;
616 0 : usec = GDKusec();
617 0 : microseconds = (uint64_t) startup_time.tv_sec * 1000000 +
618 0 : (uint64_t) startup_time.tv_usec + (uint64_t) usec;
619 :
620 : /* get CPU load on beat boundaries only */
621 0 : if (getCPULoad(cpuload))
622 : return;
623 :
624 0 : logbuf = (struct logbuf) { 0 };
625 :
626 0 : if (!logadd(&logbuf, "{")) // fill in later with the event counter
627 : return;
628 0 : if (!GDKinmemory(0) && !GDKembedded()) {
629 0 : char *uuid = NULL, *err;
630 0 : if ((err = msab_getUUID(&uuid)) == NULL) {
631 0 : bool ok = logadd(&logbuf, "\"session\":\"%s\",", uuid);
632 0 : free(uuid);
633 0 : if (!ok)
634 0 : return;
635 : } else
636 0 : free(err);
637 : }
638 0 : if (!logadd(&logbuf, "\"clk\":" LLFMT ",\"ctime\":%" PRIu64 ",\"rss\":%zu,",
639 0 : usec, microseconds, MT_getrss() / 1024 / 1024))
640 : return;
641 : #ifdef HAVE_SYS_RESOURCE_H
642 0 : getrusage(RUSAGE_SELF, &infoUsage);
643 0 : if (infoUsage.ru_inblock - prevUsage.ru_inblock
644 0 : && !logadd(&logbuf, "\"inblock\":%ld,",
645 : infoUsage.ru_inblock - prevUsage.ru_inblock))
646 : return;
647 0 : if (infoUsage.ru_oublock - prevUsage.ru_oublock
648 0 : && !logadd(&logbuf, "\"oublock\":%ld,",
649 : infoUsage.ru_oublock - prevUsage.ru_oublock))
650 : return;
651 0 : if (infoUsage.ru_majflt - prevUsage.ru_majflt
652 0 : && !logadd(&logbuf, "\"majflt\":%ld,",
653 : infoUsage.ru_majflt - prevUsage.ru_majflt))
654 : return;
655 0 : if (infoUsage.ru_nswap - prevUsage.ru_nswap
656 0 : && !logadd(&logbuf, "\"nswap\":%ld,",
657 : infoUsage.ru_nswap - prevUsage.ru_nswap))
658 : return;
659 0 : if (infoUsage.ru_nvcsw - prevUsage.ru_nvcsw
660 0 : && !logadd(&logbuf, "\"nvcsw\":%ld,",
661 0 : infoUsage.ru_nvcsw - prevUsage.ru_nvcsw +
662 0 : infoUsage.ru_nivcsw - prevUsage.ru_nivcsw))
663 : return;
664 0 : prevUsage = infoUsage;
665 : #endif
666 0 : if (!logadd(&logbuf, "\"state\":\"%s\"," "\"cpuload\":%s" "}\n", // end marker
667 : alter, cpuload))
668 : return;
669 0 : logjsonInternal(logbuf.logbuffer, true);
670 0 : logdel(&logbuf);
671 : }
672 :
673 : void
674 45 : profilerEvent(MalEvent *me, NonMalEvent *nme)
675 : {
676 45 : str event = NULL;
677 45 : assert(me == NULL || nme == NULL);
678 : /* ignore profiler monitoring cmds */
679 45 : if (me != NULL && me->cntxt != NULL && getModuleId(me->pci) == myname)
680 : return;
681 :
682 44 : MT_lock_set(&mal_profileLock);
683 44 : if (maleventstream) {
684 0 : if (me != NULL && me->mb != NULL && nme == NULL) {
685 0 : if (me->stk == NULL ||
686 0 : me->pci == NULL ||
687 0 : (profilerMode && me->mb && getPC(me->mb, me->pci) != 0)) {
688 0 : MT_lock_unset(&mal_profileLock);
689 0 : return; /* minimal mode */
690 : }
691 0 : event = prepareMalEvent(me->cntxt, me->mb, me->stk, me->pci,
692 : me->clk, me->duration);
693 : }
694 0 : if (me == NULL && nme != NULL && nme->phase != MAL_ENGINE) {
695 0 : event = prepareNonMalEvent(nme->cntxt, nme->phase, nme->clk,
696 : nme->tid, nme->ts, nme->state,
697 : nme->duration);
698 : }
699 0 : if (event) {
700 0 : logjsonInternal(event, true);
701 0 : GDKfree(event);
702 : }
703 : }
704 44 : MT_lock_unset(&mal_profileLock);
705 : }
706 :
707 : /* The first scheme dumps the events on a stream (and in the pool)
708 : */
709 : str
710 0 : openProfilerStream(Client cntxt, int m)
711 : {
712 : #ifdef HAVE_SYS_RESOURCE_H
713 0 : getrusage(RUSAGE_SELF, &infoUsage);
714 0 : prevUsage = infoUsage;
715 : #endif
716 0 : MT_lock_set(&mal_profileLock);
717 0 : if (myname == 0) {
718 0 : myname = putName("profiler");
719 0 : logjsonInternal(monet_characteristics, true);
720 : }
721 0 : if (maleventstream) {
722 : /* The DBA can always grab the stream, others have to wait */
723 0 : if (cntxt->user == MAL_ADMIN) {
724 0 : closeProfilerStream(cntxt);
725 : } else {
726 0 : MT_lock_unset(&mal_profileLock);
727 0 : throw(MAL, "profiler.start",
728 : "Profiler already running, stream not available");
729 : }
730 : }
731 : /* 4 activates profiler in minimal mode. 1 and 3 were used in prev MonetDB versions */
732 : /* 0 activates profiler in detailed mode */
733 0 : switch (m) {
734 0 : case 0:
735 0 : profilerStatus = -1;
736 0 : break;
737 0 : case 4:
738 0 : profilerStatus = -1;
739 0 : profilerMode = 1;
740 0 : break;
741 0 : default:
742 0 : MT_lock_unset(&mal_profileLock);
743 0 : throw(MAL, "profiler.openstream", "Undefined profiler mode option");
744 : }
745 0 : maleventstream = cntxt->fdout;
746 0 : profilerUser = cntxt->user;
747 :
748 0 : MT_lock_unset(&mal_profileLock);
749 0 : return MAL_SUCCEED;
750 : }
751 :
752 : str
753 1 : closeProfilerStream(Client cntxt)
754 : {
755 1 : (void) cntxt;
756 1 : maleventstream = NULL;
757 1 : profilerStatus = 0;
758 1 : profilerMode = 0;
759 1 : profilerUser = 0;
760 1 : return MAL_SUCCEED;
761 : }
762 :
763 : /* the second scheme is to collect the profile
764 : * events in a local table for direct SQL inspection
765 : */
766 : str
767 1 : startProfiler(Client cntxt)
768 : {
769 : #ifdef HAVE_SYS_RESOURCE_H
770 1 : getrusage(RUSAGE_SELF, &infoUsage);
771 1 : prevUsage = infoUsage;
772 : #endif
773 1 : (void) cntxt;
774 :
775 1 : MT_lock_set(&mal_profileLock);
776 1 : if (maleventstream) {
777 0 : MT_lock_unset(&mal_profileLock);
778 0 : throw(MAL, "profiler.start",
779 : "Profiler already running, stream not available");
780 : }
781 1 : if (myname == 0) {
782 1 : myname = putName("profiler");
783 : }
784 1 : profilerStatus = 1;
785 1 : logjsonInternal(monet_characteristics, true);
786 1 : MT_lock_unset(&mal_profileLock);
787 : // reset the trace table
788 1 : clearTrace(cntxt);
789 :
790 1 : return MAL_SUCCEED;
791 : }
792 :
793 : /* SQL tracing is simplified, because it only collects the events in the temporary table.
794 : */
795 : str
796 22 : startTrace(Client cntxt)
797 : {
798 22 : cntxt->sqlprofiler = TRUE;
799 22 : clearTrace(cntxt);
800 22 : return MAL_SUCCEED;
801 : }
802 :
803 : str
804 26 : stopTrace(Client cntxt)
805 : {
806 26 : cntxt->sqlprofiler = FALSE;
807 26 : return MAL_SUCCEED;
808 : }
809 :
810 : str
811 330 : stopProfiler(Client cntxt)
812 : {
813 330 : MT_lock_set(&mal_profileLock);
814 330 : if (profilerStatus)
815 1 : profilerStatus = 0;
816 330 : setHeartbeat(0); // stop heartbeat
817 330 : if (cntxt)
818 1 : closeProfilerStream(cntxt);
819 330 : MT_lock_unset(&mal_profileLock);
820 330 : return MAL_SUCCEED;
821 : }
822 :
823 : /*
824 : * SQL profile traces
825 : * The events being captured are stored in client specific BATs.
826 : * They are made persistent to accumate information over
827 : * multiple sessions. This means it has to be explicitly reset
828 : * to avoid disc overflow using profiler.reset().
829 : *
830 : * The information returned for the trace is purposely limited
831 : * to the ticks and the final MAL instruction.
832 : * For more detailed analysis, the stethoscope should be used.
833 : */
834 :
835 : static void
836 14 : _cleanupProfiler(Client cntxt)
837 : {
838 14 : BBPreclaim(cntxt->profticks);
839 14 : BBPreclaim(cntxt->profstmt);
840 14 : BBPreclaim(cntxt->profevents);
841 14 : cntxt->profticks = cntxt->profstmt = cntxt->profevents = NULL;
842 14 : }
843 :
844 : static inline BAT *
845 120 : TRACEcreate(int tt)
846 : {
847 120 : return COLnew(0, tt, 1 << 10, TRANSIENT);
848 : }
849 :
850 : static void
851 88 : initTrace(Client cntxt)
852 : {
853 88 : MT_lock_set(&mal_profileLock);
854 88 : if (cntxt->profticks) {
855 48 : MT_lock_unset(&mal_profileLock);
856 48 : return; /* already initialized */
857 : }
858 40 : cntxt->profticks = TRACEcreate(TYPE_lng);
859 40 : cntxt->profstmt = TRACEcreate(TYPE_str);
860 40 : cntxt->profevents = TRACEcreate(TYPE_str);
861 40 : if (cntxt->profticks == NULL || cntxt->profstmt == NULL
862 40 : || cntxt->profevents == NULL)
863 0 : _cleanupProfiler(cntxt);
864 40 : MT_lock_unset(&mal_profileLock);
865 : }
866 :
867 : int
868 61 : TRACEtable(Client cntxt, BAT **r)
869 : {
870 61 : initTrace(cntxt);
871 61 : MT_lock_set(&mal_profileLock);
872 61 : if (cntxt->profticks == NULL) {
873 0 : MT_lock_unset(&mal_profileLock);
874 0 : return -1; /* not initialized */
875 : }
876 61 : r[0] = COLcopy(cntxt->profticks, cntxt->profticks->ttype, false, TRANSIENT);
877 61 : r[1] = COLcopy(cntxt->profstmt, cntxt->profstmt->ttype, false, TRANSIENT);
878 61 : r[2] = COLcopy(cntxt->profevents, cntxt->profevents->ttype, false,
879 : TRANSIENT);
880 61 : MT_lock_unset(&mal_profileLock);
881 61 : if (r[0] == NULL || r[1] == NULL || r[2] == NULL) {
882 0 : BBPreclaim(r[0]);
883 0 : BBPreclaim(r[1]);
884 0 : BBPreclaim(r[2]);
885 0 : return -1;
886 : }
887 : return 3;
888 : }
889 :
890 : BAT *
891 8 : getTrace(Client cntxt, const char *nme)
892 : {
893 8 : BAT *bn = NULL;
894 :
895 8 : MT_lock_set(&mal_profileLock);
896 8 : if (cntxt->profticks) {
897 8 : if (strcmp(nme, "usec") == 0) {
898 4 : bn = COLcopy(cntxt->profticks, cntxt->profticks->ttype, false,
899 : TRANSIENT);
900 4 : } else if (strcmp(nme, "stmt") == 0) {
901 4 : bn = COLcopy(cntxt->profstmt, cntxt->profstmt->ttype, false,
902 : TRANSIENT);
903 0 : } else if (strcmp(nme, "events") == 0) {
904 0 : bn = COLcopy(cntxt->profevents, cntxt->profevents->ttype, false,
905 : TRANSIENT);
906 : }
907 : }
908 8 : MT_lock_unset(&mal_profileLock);
909 8 : return bn;
910 : }
911 :
912 : void
913 27 : clearTrace(Client cntxt)
914 : {
915 27 : (void) cntxt;
916 27 : MT_lock_set(&mal_profileLock);
917 27 : if (cntxt->profticks == NULL) {
918 13 : MT_lock_unset(&mal_profileLock);
919 13 : initTrace(cntxt);
920 13 : return; /* not initialized */
921 : }
922 : /* drop all trace tables */
923 14 : _cleanupProfiler(cntxt);
924 14 : MT_lock_unset(&mal_profileLock);
925 14 : initTrace(cntxt);
926 : }
927 :
928 : str
929 0 : cleanupTraces(Client cntxt)
930 : {
931 0 : clearTrace(cntxt);
932 0 : return MAL_SUCCEED;
933 : }
934 :
935 : void
936 843 : sqlProfilerEvent(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr pci,
937 : lng clk, lng ticks)
938 : {
939 843 : str stmt, c, ev;
940 843 : int errors = 0;
941 :
942 843 : if (cntxt->profticks == NULL)
943 : return;
944 :
945 : /* generate actual call statement */
946 843 : stmt = instruction2str(mb, stk, pci, LIST_MAL_ALL | LIST_MAL_ALGO);
947 843 : c = stmt;
948 :
949 : /* unclear why we needed this. OLD?
950 : while (c && *c && (isspace((unsigned char)*c) || *c == '!'))
951 : c++;
952 : */
953 :
954 843 : ev = prepareMalEvent(cntxt, mb, stk, pci, clk, ticks);
955 : // keep it a short transaction
956 843 : MT_lock_set(&mal_profileLock);
957 843 : if (cntxt->profticks == NULL) {
958 0 : MT_lock_unset(&mal_profileLock);
959 0 : GDKfree(stmt);
960 0 : return;
961 : }
962 843 : errors += BUNappend(cntxt->profticks, &ticks, false) != GDK_SUCCEED;
963 843 : errors += BUNappend(cntxt->profstmt, c, false) != GDK_SUCCEED;
964 843 : errors += BUNappend(cntxt->profevents, ev ? ev : str_nil,
965 843 : false) != GDK_SUCCEED;
966 843 : if (errors > 0) {
967 : /* stop profiling if an error occurred */
968 0 : cntxt->sqlprofiler = FALSE;
969 : }
970 :
971 843 : MT_lock_unset(&mal_profileLock);
972 843 : GDKfree(stmt);
973 843 : GDKfree(ev);
974 : }
975 :
976 : lng
977 0 : getDiskWrites(void)
978 : {
979 : #ifdef HAVE_SYS_RESOURCE_H
980 0 : struct rusage infoUsage;
981 0 : getrusage(RUSAGE_SELF, &infoUsage);
982 0 : return infoUsage.ru_oublock;
983 : #else
984 : return 0;
985 : #endif
986 : }
987 :
988 : lng
989 0 : getDiskReads(void)
990 : {
991 : #ifdef HAVE_SYS_RESOURCE_H
992 0 : struct rusage infoUsage;
993 0 : getrusage(RUSAGE_SELF, &infoUsage);
994 0 : return infoUsage.ru_inblock;
995 : #else
996 : return 0;
997 : #endif
998 : }
999 :
1000 : lng
1001 0 : getUserTime(void)
1002 : {
1003 : #ifdef HAVE_TIMES
1004 0 : struct tms newTms;
1005 0 : times(&newTms);
1006 0 : return newTms.tms_utime;
1007 : #else
1008 : return 0;
1009 : #endif
1010 : }
1011 :
1012 : lng
1013 0 : getSystemTime(void)
1014 : {
1015 : #ifdef HAVE_TIMES
1016 0 : struct tms newTms;
1017 0 : times(&newTms);
1018 0 : return newTms.tms_stime;
1019 : #else
1020 : return 0;
1021 : #endif
1022 : }
1023 :
1024 : /* Calculate a pessimistic size of the disk storage */
1025 : lng
1026 0 : getDiskSpace(void)
1027 : {
1028 0 : BAT *b;
1029 0 : bat i;
1030 0 : lng size = 0;
1031 :
1032 0 : for (i = 1; i < getBBPsize(); i++)
1033 0 : if (BBP_logical(i) && (BBP_refs(i) || BBP_lrefs(i))) {
1034 0 : b = BATdescriptor(i);
1035 0 : if (b) {
1036 0 : size += sizeof(BAT);
1037 :
1038 0 : MT_lock_set(&b->theaplock);
1039 0 : if (!isVIEW(b)) {
1040 0 : BUN cnt = BATcount(b);
1041 :
1042 : /* the upperbound is used for the heaps */
1043 0 : if (b->tvheap)
1044 0 : size += HEAPvmsize(b->tvheap);
1045 0 : MT_lock_unset(&b->theaplock);
1046 :
1047 0 : size += tailsize(b, cnt);
1048 0 : if (b->thash)
1049 0 : size += sizeof(BUN) * cnt;
1050 : /* also add the size of an ordered index */
1051 0 : if (b->torderidx)
1052 0 : size += HEAPvmsize(b->torderidx);
1053 : } else {
1054 0 : MT_lock_unset(&b->theaplock);
1055 : }
1056 0 : BBPunfix(i);
1057 : }
1058 : }
1059 0 : return size;
1060 : }
1061 :
1062 :
1063 : void
1064 10 : profilerGetCPUStat(lng *user, lng *nice, lng *sys, lng *idle, lng *iowait)
1065 : {
1066 10 : (void) getCPULoad(NULL);
1067 10 : *user = corestat[LASTCPU].user;
1068 10 : *nice = corestat[LASTCPU].nice;
1069 10 : *sys = corestat[LASTCPU].system;
1070 10 : *idle = corestat[LASTCPU].idle;
1071 10 : *iowait = corestat[LASTCPU].iowait;
1072 10 : }
1073 :
1074 : /* the heartbeat process produces a ping event once every X milliseconds */
1075 : static MT_Id hbthread;
1076 : static ATOMIC_TYPE hbrunning = ATOMIC_VAR_INIT(0);
1077 :
1078 : static void
1079 330 : profilerHeartbeat(void *dummy)
1080 : {
1081 330 : int t;
1082 342 : const int timeout = ATOMIC_GET(&GDKdebug) & TESTINGMASK ? 10 : 25;
1083 :
1084 330 : (void) dummy;
1085 330 : for (;;) {
1086 : /* wait until you need this info */
1087 330 : MT_thread_setworking("sleeping");
1088 148237 : while (ATOMIC_GET(&hbdelay) == 0 || maleventstream == NULL) {
1089 148237 : if (GDKexiting() || !ATOMIC_GET(&hbrunning))
1090 329 : return;
1091 147908 : MT_sleep_ms(timeout);
1092 : }
1093 0 : for (t = (int) ATOMIC_GET(&hbdelay); t > 0; t -= timeout) {
1094 0 : if (GDKexiting() || !ATOMIC_GET(&hbrunning))
1095 0 : return;
1096 0 : MT_sleep_ms(t > timeout ? timeout : t);
1097 : }
1098 0 : if (GDKexiting() || !ATOMIC_GET(&hbrunning))
1099 0 : return;
1100 0 : MT_thread_setworking("pinging");
1101 0 : profilerHeartbeatEvent("ping");
1102 : }
1103 : }
1104 :
1105 : void
1106 659 : setHeartbeat(int delay)
1107 : {
1108 659 : if (delay < 0) {
1109 329 : ATOMIC_SET(&hbrunning, 0);
1110 329 : if (hbthread)
1111 329 : MT_join_thread(hbthread);
1112 329 : return;
1113 : }
1114 330 : if (delay > 0 && delay <= 10)
1115 0 : delay = 10;
1116 330 : ATOMIC_SET(&hbdelay, delay);
1117 : }
1118 :
1119 : /* TODO getprofilerlimit and setprofilerlimit functions */
1120 :
1121 : int
1122 2 : getprofilerlimit(void)
1123 : {
1124 2 : return 0;
1125 : }
1126 :
1127 : void
1128 0 : setprofilerlimit(int limit)
1129 : {
1130 0 : (void) limit;
1131 0 : }
1132 :
1133 : void
1134 330 : initProfiler(void)
1135 : {
1136 330 : gettimeofday(&startup_time, NULL);
1137 330 : }
1138 :
1139 : void
1140 330 : initHeartbeat(void)
1141 : {
1142 330 : ATOMIC_SET(&hbrunning, 1);
1143 330 : if (MT_create_thread(&hbthread, profilerHeartbeat, NULL, MT_THR_JOINABLE,
1144 : "heartbeat") < 0) {
1145 : /* it didn't happen */
1146 0 : hbthread = 0;
1147 0 : ATOMIC_SET(&hbrunning, 0);
1148 : }
1149 330 : }
|