Line data Source code
1 : /*
2 : * SPDX-License-Identifier: MPL-2.0
3 : *
4 : * This Source Code Form is subject to the terms of the Mozilla Public
5 : * License, v. 2.0. If a copy of the MPL was not distributed with this
6 : * file, You can obtain one at http://mozilla.org/MPL/2.0/.
7 : *
8 : * Copyright 2024 MonetDB Foundation;
9 : * Copyright August 2008 - 2023 MonetDB B.V.;
10 : * Copyright 1997 - July 2008 CWI.
11 : */
12 :
13 : /* (c) M.L. Kersten
14 : * Performance tracing
15 : * The stethoscope/tachograph and tomograph performance monitors have exclusive
16 : * access to a single event stream, which avoids concurrency conflicts amongst
17 : * clients.
18 : * It also avoid cluthered event records on the stream. Since this event stream
19 : * is owned by a client, we should ensure that the profiler is automatically
20 : * reset once the owner leaves.
21 : */
22 : #include "monetdb_config.h"
23 : #include "mutils.h" /* mercurial_revision */
24 : #include "msabaoth.h" /* msab_getUUID */
25 : #include "mal_authorize.h"
26 : #include "mal_function.h"
27 : #include "mal_listing.h"
28 : #include "mal_profiler.h"
29 : #include "mal_runtime.h"
30 : #include "mal_utils.h"
31 : #include "mal_resource.h"
32 : #include "mal_internal.h"
33 :
34 : #ifdef HAVE_SYS_TIME_H
35 : #include <sys/time.h>
36 : #endif
37 :
38 : #include <string.h>
39 :
40 : static const char *myname = 0; // avoid tracing the profiler module
41 :
42 : /* The JSON rendering can be either using '\n' separators between
43 : * each key:value pair or as a single line.
44 : * The current stethoscope implementation requires the first option and
45 : * also the term rendering to be set to ''
46 : */
47 :
48 : int profilerStatus = 0; /* global flag profiler status */
49 : int profilerMode = 0; /* global flag profiler mode, minimal or detailed */
50 : static oid profilerUser; /* keep track on who has claimed the channel */
51 :
52 : static struct timeval startup_time;
53 :
54 : static ATOMIC_TYPE hbdelay = ATOMIC_VAR_INIT(0);
55 :
56 : #ifdef HAVE_SYS_RESOURCE_H
57 : #include <sys/resource.h>
58 : struct rusage infoUsage;
59 : static struct rusage prevUsage;
60 : #endif
61 :
62 : #define LOGLEN 8192
63 :
64 : static void
65 2 : logjsonInternal(char *logbuffer, bool flush)
66 : {
67 2 : size_t len;
68 2 : len = strlen(logbuffer);
69 :
70 2 : (void) mnstr_write(maleventstream, logbuffer, 1, len);
71 2 : if (flush)
72 1 : (void) mnstr_flush(maleventstream, MNSTR_FLUSH_DATA);
73 2 : }
74 :
75 : /*
76 : * We use a buffer (`logbuffer`) where we incrementally create the output JSON
77 : * object. Initially we allocate LOGLEN (8K)
78 : * bytes and we keep the capacity of the buffer (`logcap`) and the length of the
79 : * current string (`loglen`).
80 : *
81 : * We use the `logadd` function to add data to our buffer (usually key-value
82 : * pairs). This macro offers an interface similar to printf.
83 : *
84 : * The first snprintf below happens in a statically allocated buffer that might
85 : * be much smaller than logcap. We do not care. We only need to perform this
86 : * snprintf to get the actual length of the string that is to be produced.
87 : *
88 : * There are three cases:
89 : *
90 : * 1. The new string fits in the current buffer -> we just update the buffer
91 : *
92 : * 2. The new string does not fit in the current buffer, but is smaller than the
93 : * capacity of the buffer -> we output the current contents of the buffer and
94 : * start at the beginning.
95 : *
96 : * 3. The new string exceeds the current capacity of the buffer -> we output the
97 : * current contents and reallocate the buffer. The new capacity is 1.5 times the
98 : * length of the new string.
99 : */
100 : struct logbuf {
101 : char *logbuffer;
102 : char *logbase;
103 : size_t loglen;
104 : size_t logcap;
105 : };
106 :
107 : static inline void
108 964 : lognew(struct logbuf *logbuf)
109 : {
110 964 : logbuf->loglen = 0;
111 964 : logbuf->logbase = logbuf->logbuffer;
112 964 : *logbuf->logbase = 0;
113 964 : }
114 :
115 : static inline void
116 0 : logdel(struct logbuf *logbuf)
117 : {
118 0 : GDKfree(logbuf->logbuffer);
119 0 : logbuf->logbuffer = NULL;
120 0 : }
121 :
122 : static bool logadd(struct logbuf *logbuf,
123 : _In_z_ _Printf_format_string_ const char *fmt, ...)
124 : __attribute__((__format__(__printf__, 2, 3)))
125 : __attribute__((__warn_unused_result__));
126 : static bool
127 37656 : logadd(struct logbuf *logbuf, const char *fmt, ...)
128 : {
129 37656 : char tmp_buff[LOGLEN];
130 37656 : int tmp_len;
131 37656 : va_list va;
132 :
133 37656 : va_start(va, fmt);
134 37656 : tmp_len = vsnprintf(tmp_buff, sizeof(tmp_buff), fmt, va);
135 37656 : va_end(va);
136 37656 : if (tmp_len < 0) {
137 0 : logdel(logbuf);
138 0 : return false;
139 : }
140 37656 : if (logbuf->loglen + (size_t) tmp_len >= logbuf->logcap) {
141 959 : if ((size_t) tmp_len >= logbuf->logcap) {
142 : /* includes first time when logbuffer == NULL and logcap == 0 */
143 958 : char *alloc_buff;
144 958 : if (logbuf->loglen > 0)
145 0 : logjsonInternal(logbuf->logbuffer, false);
146 958 : logbuf->logcap = (size_t) tmp_len + (size_t) tmp_len / 2;
147 958 : if (logbuf->logcap < LOGLEN)
148 958 : logbuf->logcap = LOGLEN;
149 958 : alloc_buff = GDKrealloc(logbuf->logbuffer, logbuf->logcap);
150 963 : if (alloc_buff == NULL) {
151 0 : TRC_ERROR(MAL_SERVER,
152 : "Profiler JSON buffer reallocation failure\n");
153 0 : logdel(logbuf);
154 0 : return false;
155 : }
156 963 : logbuf->logbuffer = alloc_buff;
157 963 : lognew(logbuf);
158 : } else {
159 1 : logjsonInternal(logbuf->logbuffer, false);
160 1 : lognew(logbuf);
161 : }
162 : }
163 37661 : if (tmp_len > 0) {
164 37660 : va_start(va, fmt);
165 37660 : logbuf->loglen += vsnprintf(logbuf->logbase + logbuf->loglen,
166 37660 : logbuf->logcap - logbuf->loglen, fmt, va);
167 37660 : va_end(va);
168 : }
169 : return true;
170 : }
171 :
172 : static str phase_descriptions[] = {
173 : [CLIENT_START] = "session_start",
174 : [CLIENT_END] = "session_end",
175 : [TEXT_TO_SQL] = "text_to_sql",
176 : [SQL_TO_REL] = "sql_to_rel",
177 : [REL_OPT] = "rel_opt",
178 : [REL_TO_MAL] = "rel_to_mal",
179 : [MAL_OPT] = "mal_opt",
180 : [MAL_ENGINE] = "mal_engine",
181 : [COMMIT] = "trans_commit",
182 : [ROLLBACK] = "trans_rollback",
183 : [CONFLICT] = "trans_conflict"
184 : };
185 :
186 : static str
187 0 : prepareNonMalEvent(Client cntxt, enum event_phase phase, ulng clk, ulng *tstart,
188 : ulng *tend, int state, ulng duration)
189 : {
190 0 : oid *tag = NULL;
191 0 : str query = NULL;
192 0 : struct logbuf logbuf = { 0 };
193 :
194 0 : uint64_t mclk = (uint64_t) clk -
195 0 : ((uint64_t) startup_time.tv_sec * 1000000 -
196 0 : (uint64_t) startup_time.tv_usec);
197 :
198 0 : assert(cntxt);
199 0 : int sessionid = cntxt->idx;
200 0 : if (cntxt->curprg)
201 0 : tag = &cntxt->curprg->def->tag;
202 0 : if (cntxt->query
203 0 : && (query = mal_quote(cntxt->query, strlen(cntxt->query))) == NULL)
204 : return NULL;
205 :
206 0 : if (!logadd(&logbuf, "{\"sessionid\":\"%d\"", sessionid))
207 0 : goto cleanup_and_exit;
208 0 : if (!logadd(&logbuf, ", \"clk\":" ULLFMT "", mclk))
209 0 : goto cleanup_and_exit;
210 0 : if (!logadd(&logbuf, ", \"thread\":%zu, \"phase\":\"%s\"",
211 : MT_getpid(), phase_descriptions[phase]))
212 0 : goto cleanup_and_exit;
213 0 : if (tstart && !logadd(&logbuf, ", \"tstart\":" ULLFMT, *tstart))
214 0 : goto cleanup_and_exit;
215 0 : if (tend && !logadd(&logbuf, ", \"tend\":" ULLFMT, *tend))
216 0 : goto cleanup_and_exit;
217 0 : if (tag && !logadd(&logbuf, ", \"tag\":" OIDFMT, *tag))
218 0 : goto cleanup_and_exit;
219 0 : if (query && phase == TEXT_TO_SQL
220 0 : && !logadd(&logbuf, ", \"query\":\"%s\"", query))
221 0 : goto cleanup_and_exit;
222 0 : if (state != 0 && !logadd(&logbuf, ", \"state\":\"error\""))
223 0 : goto cleanup_and_exit;
224 0 : if (!logadd(&logbuf, ", \"usec\":" ULLFMT "}\n", duration))
225 0 : goto cleanup_and_exit;
226 0 : GDKfree(query);
227 0 : return logbuf.logbuffer;
228 0 : cleanup_and_exit:
229 0 : GDKfree(query);
230 0 : logdel(&logbuf);
231 0 : return NULL;
232 : }
233 :
234 : static inline str
235 1935 : format_val2json(const ValPtr res)
236 : {
237 1935 : char *buf = NULL;
238 1935 : size_t sz = 0;
239 :
240 3855 : if (BATatoms[res->vtype].atomNull &&
241 1930 : (!VALget(res)
242 1920 : || BATatoms[res->vtype].atomCmp(VALget(res),
243 : BATatoms[res->vtype].atomNull) == 0))
244 85 : return GDKstrdup("\"nil\"");
245 :
246 1838 : bool use_external = true;
247 :
248 1838 : switch (res->vtype) {
249 519 : case TYPE_bte:
250 : case TYPE_sht:
251 : case TYPE_int:
252 : case TYPE_flt:
253 : case TYPE_dbl:
254 : case TYPE_lng:
255 : #ifdef HAVE_HGE
256 : case TYPE_hge:
257 : #endif
258 519 : use_external = false;
259 : }
260 :
261 1838 : if ((*BATatoms[res->vtype].atomToStr) (&buf, &sz, VALptr(res),
262 : use_external) < 0)
263 : return NULL;
264 :
265 1834 : if (!use_external || res->vtype == TYPE_str)
266 958 : return buf;
267 :
268 876 : ValRecord val;
269 876 : if (VALinit(&val, TYPE_str, buf) == NULL) {
270 0 : GDKfree(buf);
271 0 : return NULL;
272 : }
273 :
274 876 : GDKfree(buf);
275 :
276 876 : char *buf2;
277 876 : buf2 = VALformat(&val);
278 876 : VALclear(&val);
279 :
280 876 : return buf2;
281 : }
282 :
283 : static str
284 958 : prepareMalEvent(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr pci,
285 : lng clk, lng ticks)
286 : {
287 958 : struct logbuf logbuf;
288 958 : str c;
289 958 : uint64_t mclk;
290 958 : bool ok;
291 958 : const char *algo = MT_thread_getalgorithm();
292 :
293 : /* The stream of events can be complete read by the DBA,
294 : * all other users can only see events assigned to their account
295 : */
296 960 : if (profilerUser != MAL_ADMIN && profilerUser != cntxt->user)
297 : return NULL;
298 :
299 960 : logbuf = (struct logbuf) { 0 };
300 :
301 960 : mclk = (uint64_t) clk - ((uint64_t) startup_time.tv_sec * 1000000 -
302 960 : (uint64_t) startup_time.tv_usec);
303 : /* make profile event tuple */
304 1925 : if (!logadd(&logbuf, "{" // fill in later with the event counter
305 : "\"sessionid\":\"%d\""
306 : ",\"clk\":%" PRIu64 ""
307 : ",\"thread\":%zu"
308 : ",\"phase\":\"%s\""
309 : ",\"pc\":%d"
310 : ",\"tag\":" OIDFMT,
311 : cntxt->idx,
312 : mclk,
313 : MT_getpid(),
314 : phase_descriptions[MAL_ENGINE],
315 960 : mb ? getPC(mb, pci) : 0, stk ? stk->tag : 0))
316 0 : goto cleanup_and_exit;
317 954 : if (pci->modname
318 923 : && !logadd(&logbuf, ",\"module\":\"%s\"",
319 : pci->modname ? pci->modname : ""))
320 0 : goto cleanup_and_exit;
321 955 : if (pci->fcnname
322 922 : && !logadd(&logbuf, ",\"function\":\"%s\"",
323 : pci->fcnname ? pci->fcnname : ""))
324 0 : goto cleanup_and_exit;
325 955 : if (pci->barrier
326 4 : && !logadd(&logbuf, ",\"barrier\":\"%s\"", operatorName(pci->barrier)))
327 0 : goto cleanup_and_exit;
328 1027 : if ((pci->token < FCNcall || pci->token > PATcall) &&
329 72 : !logadd(&logbuf, ",\"operator\":\"%s\"", operatorName(pci->token)))
330 0 : goto cleanup_and_exit;
331 955 : if (!logadd(&logbuf, ",\"usec\":" LLFMT, ticks))
332 0 : goto cleanup_and_exit;
333 958 : if (algo && !logadd(&logbuf, ",\"algorithm\":\"%s\"", algo))
334 0 : goto cleanup_and_exit;
335 958 : if (mb && pci->modname && pci->fcnname) {
336 922 : int j;
337 :
338 922 : if (profilerMode == 0 && stk) {
339 922 : if (!logadd(&logbuf, ",\"args\":["))
340 0 : goto cleanup_and_exit;
341 926 : char name[IDLENGTH] = { 0 };
342 4786 : for (j = 0; j < pci->argc; j++) {
343 3865 : int tpe = getVarType(mb, getArg(pci, j));
344 3865 : str tname = 0, cv;
345 3865 : lng total = 0;
346 3865 : BUN cnt = 0;
347 3865 : bat bid = 0;
348 :
349 3865 : if (j == 0) {
350 : // No comma at the beginning
351 923 : if (!logadd(&logbuf, "{"))
352 0 : goto cleanup_and_exit;
353 : } else {
354 2942 : if (!logadd(&logbuf, ",{"))
355 0 : goto cleanup_and_exit;
356 : }
357 7763 : if (!logadd(&logbuf, "\"%s\":%d,\"var\":\"%s\"",
358 3879 : j < pci->retc ? "ret" : "arg", j,
359 : getVarNameIntoBuffer(mb, getArg(pci, j), name)))
360 0 : goto cleanup_and_exit;
361 : //c = getVarName(mb, getArg(pci, j), name);
362 3858 : if (getVarSTC(mb, getArg(pci, j))) {
363 0 : InstrPtr stc = getInstrPtr(mb, getVarSTC(mb, getArg(pci, j)));
364 0 : if (stc && getModuleId(stc)
365 0 : && strcmp(getModuleId(stc), "sql") == 0
366 0 : && strncmp(getFunctionId(stc), "bind", 4) == 0
367 0 : && !logadd(&logbuf, ",\"alias\":\"%s.%s.%s\"",
368 0 : getVarConstant(mb,
369 : getArg(stc,
370 : stc->retc +
371 : 1)).val.sval,
372 0 : getVarConstant(mb,
373 : getArg(stc,
374 : stc->retc +
375 : 2)).val.sval,
376 0 : getVarConstant(mb,
377 : getArg(stc,
378 : stc->retc +
379 : 3)).val.sval))
380 0 : goto cleanup_and_exit;
381 : }
382 3858 : if (isaBatType(tpe)) {
383 1964 : BAT *d = BATdescriptor(bid = stk->stk[getArg(pci, j)].val.bval);
384 1975 : tname = getTypeName(getBatType(tpe));
385 1975 : ok = logadd(&logbuf, ",\"type\":\"bat[:%s]\"", tname);
386 1968 : GDKfree(tname);
387 1974 : if (!ok) {
388 0 : BBPreclaim(d);
389 0 : goto cleanup_and_exit;
390 : }
391 1974 : if (d) {
392 1787 : MT_lock_set(&d->theaplock);
393 1788 : BATiter di = bat_iterator_nolock(d);
394 : /* outside the lock we cannot dereference di.h or di.vh,
395 : * but we can use all values without dereference and
396 : * without further locking */
397 1788 : MT_lock_unset(&d->theaplock);
398 1787 : cnt = di.count;
399 1787 : if (isVIEW(d)) {
400 262 : BAT *v = BBP_desc(VIEWtparent(d));
401 262 : MT_lock_set(&v->theaplock);
402 262 : bool vtransient = v->batTransient;
403 262 : MT_lock_unset(&v->theaplock);
404 524 : if (!logadd(&logbuf,
405 : ",\"view\":\"true\""
406 : ",\"parent\":%d"
407 : ",\"seqbase\":" BUNFMT
408 : ",\"mode\":\"%s\"",
409 262 : VIEWtparent(d),
410 : d->hseqbase,
411 : vtransient ? "transient" :
412 : "persistent")) {
413 0 : BBPunfix(d->batCacheid);
414 0 : goto cleanup_and_exit;
415 : }
416 : } else {
417 1525 : if (!logadd(&logbuf, ",\"mode\":\"%s\"",
418 1525 : (di.transient ? "transient" : "persistent"))) {
419 0 : BBPunfix(d->batCacheid);
420 0 : goto cleanup_and_exit;
421 : }
422 : }
423 1780 : if (!logadd(&logbuf,
424 : ",\"sorted\":%d"
425 : ",\"revsorted\":%d"
426 : ",\"nonil\":%d"
427 : ",\"nil\":%d"
428 : ",\"key\":%d",
429 1784 : di.sorted,
430 1784 : di.revsorted, di.nonil, di.nil, di.key)) {
431 0 : BBPunfix(d->batCacheid);
432 0 : goto cleanup_and_exit;
433 : }
434 1948 : if ((di.minpos != BUN_NONE &&
435 168 : !logadd(&logbuf, ",\"minpos\":\"" BUNFMT "\"",
436 1780 : di.minpos)) || (di.maxpos != BUN_NONE
437 170 : && !logadd(&logbuf,
438 : ",\"maxpos\":\""
439 : BUNFMT "\"",
440 : di.maxpos))
441 1780 : || (di.unique_est != 0
442 443 : && !logadd(&logbuf, ",\"nestimate\":\"%g\"",
443 : di.unique_est))) {
444 0 : BBPunfix(d->batCacheid);
445 0 : goto cleanup_and_exit;
446 : }
447 :
448 1781 : cv = VALformat(&stk->stk[getArg(pci, j)]);
449 1782 : if (cv) {
450 1782 : c = strchr(cv, '>');
451 1782 : if (c) /* unlikely that this isn't true */
452 0 : *c = 0;
453 1782 : ok = logadd(&logbuf, ",\"file\":\"%s\"", cv + 1);
454 1779 : GDKfree(cv);
455 1789 : if (!ok) {
456 0 : BBPunfix(d->batCacheid);
457 0 : goto cleanup_and_exit;
458 : }
459 : }
460 1789 : total += cnt << di.shift;
461 1789 : if (!logadd(&logbuf, ",\"width\":%d", di.width)) {
462 0 : BBPunfix(d->batCacheid);
463 0 : goto cleanup_and_exit;
464 : }
465 : /* keeping information about the individual auxiliary heaps is helpful during analysis. */
466 1778 : MT_rwlock_rdlock(&d->thashlock);
467 1789 : if (d->thash
468 0 : && !logadd(&logbuf, ",\"hash\":" LLFMT,
469 0 : (lng) hashinfo(d->thash,
470 : d->batCacheid))) {
471 0 : MT_rwlock_rdunlock(&d->thashlock);
472 0 : BBPunfix(d->batCacheid);
473 0 : goto cleanup_and_exit;
474 : }
475 1789 : MT_rwlock_rdunlock(&d->thashlock);
476 1786 : if (di.vh
477 310 : && !logadd(&logbuf, ",\"vheap\":" BUNFMT,
478 : di.vhfree)) {
479 0 : BBPunfix(d->batCacheid);
480 0 : goto cleanup_and_exit;
481 : }
482 1784 : if (d->timprints
483 0 : && !logadd(&logbuf, ",\"imprints\":" LLFMT,
484 0 : (lng) IMPSimprintsize(d))) {
485 0 : BBPunfix(d->batCacheid);
486 0 : goto cleanup_and_exit;
487 : }
488 : /* if (!logadd(&logbuf, "\"debug\":\"%s\",", d->debugmessages)) goto cleanup_and_exit; */
489 1784 : BBPunfix(d->batCacheid);
490 : }
491 1974 : if (!logadd(&logbuf,
492 : ",\"bid\":%d"
493 : ",\"count\":" BUNFMT
494 : ",\"size\":" LLFMT, bid, cnt, total))
495 0 : goto cleanup_and_exit;
496 : } else {
497 1894 : tname = getTypeName(tpe);
498 3811 : ok = logadd(&logbuf,
499 : ",\"type\":\"%s\""
500 : ",\"const\":%d",
501 1922 : tname, isVarConstant(mb, getArg(pci, j)));
502 1889 : GDKfree(tname);
503 1930 : if (!ok)
504 0 : goto cleanup_and_exit;
505 1930 : cv = format_val2json(&stk->stk[getArg(pci, j)]);
506 1917 : if (cv)
507 1917 : ok = logadd(&logbuf, ",\"value\":%s", cv);
508 3808 : GDKfree(cv);
509 1930 : if (!ok)
510 0 : goto cleanup_and_exit;
511 : }
512 3895 : if (!logadd(&logbuf, ",\"eol\":%d", getVarEolife(mb, getArg(pci, j))))
513 0 : goto cleanup_and_exit;
514 : // if (!logadd(&logbuf, ",\"fixed\":%d", isVarFixed(mb,getArg(pci,j)))) return NULL;
515 3858 : if (!logadd(&logbuf, "}"))
516 0 : goto cleanup_and_exit;
517 : }
518 921 : if (!logadd(&logbuf, "]")) // end marker for arguments
519 0 : goto cleanup_and_exit;
520 : }
521 : }
522 959 : if (!logadd(&logbuf, "}\n")) // end marker
523 0 : goto cleanup_and_exit;
524 962 : return logbuf.logbuffer;
525 0 : cleanup_and_exit:
526 0 : logdel(&logbuf);
527 0 : return NULL;
528 : }
529 :
530 : /* the OS details on cpu load are read from /proc/stat
531 : * We should use an OS define to react to the maximal cores
532 : */
533 : #define MAXCORES 256
534 : #define LASTCPU (MAXCORES - 1)
535 : static struct {
536 : lng user, nice, system, idle, iowait;
537 : double load;
538 : } corestat[MAXCORES];
539 :
540 : static int
541 10 : getCPULoad(char cpuload[BUFSIZ])
542 : {
543 10 : int cpu, len = 0, i;
544 10 : lng user, nice, system, idle, iowait;
545 10 : size_t n;
546 10 : char buf[512], *s;
547 10 : static FILE *proc = NULL;
548 10 : lng newload;
549 :
550 10 : if (proc == NULL) {
551 1 : proc = fopen("/proc/stat", "r");
552 1 : if (proc == NULL) {
553 : /* unexpected */
554 : return -1;
555 : }
556 : } else
557 9 : rewind(proc);
558 :
559 170 : while (fgets(buf, (int) sizeof(buf), proc) != NULL) {
560 160 : n = strlen(buf);
561 160 : if (strncmp(buf, "cpu", 3) == 0) {
562 90 : s = buf + 3;
563 90 : if (*s == ' ') {
564 : cpu = LASTCPU; // the cpu totals stored here
565 : } else {
566 80 : cpu = atoi(s);
567 80 : if (cpu < 0 || cpu > LASTCPU)
568 10 : cpu = LASTCPU;
569 : }
570 90 : s = strchr(s, ' ');
571 90 : if (s == NULL) /* unexpected format of file */
572 : break;
573 190 : while (*s && isspace((unsigned char) *s))
574 100 : s++;
575 90 : i = sscanf(s, LLSCN " " LLSCN " " LLSCN " " LLSCN " " LLSCN, &user,
576 : &nice, &system, &idle, &iowait);
577 90 : if (i == 5) {
578 90 : newload = (user - corestat[cpu].user + nice - corestat[cpu].nice +
579 90 : system - corestat[cpu].system);
580 90 : if (newload)
581 10 : corestat[cpu].load = (double) newload / (newload + idle -
582 10 : corestat[cpu].idle + iowait -
583 10 : corestat[cpu].iowait);
584 90 : corestat[cpu].user = user;
585 90 : corestat[cpu].nice = nice;
586 90 : corestat[cpu].system = system;
587 90 : corestat[cpu].idle = idle;
588 90 : corestat[cpu].iowait = iowait;
589 : }
590 : }
591 :
592 180 : while (buf[n - 1] != '\n') {
593 20 : if (fgets(buf, (int) sizeof(buf), proc) == NULL)
594 0 : goto exitloop;
595 20 : n = strlen(buf);
596 : }
597 : }
598 10 : exitloop:
599 :
600 10 : if (cpuload == NULL)
601 : return 0;
602 : // identify core processing
603 0 : len += snprintf(cpuload, BUFSIZ, "[");
604 0 : for (cpu = 0; cpuload && cpu < LASTCPU && corestat[cpu].user; cpu++) {
605 0 : len += snprintf(cpuload + len, BUFSIZ - len, "%s%.2f", (cpu ? "," : ""),
606 : corestat[cpu].load);
607 : }
608 0 : (void) snprintf(cpuload + len, BUFSIZ - len, "]");
609 0 : return 0;
610 : }
611 :
612 : void
613 0 : profilerHeartbeatEvent(char *alter)
614 : {
615 0 : char cpuload[BUFSIZ];
616 0 : struct logbuf logbuf;
617 0 : lng usec;
618 0 : uint64_t microseconds;
619 :
620 0 : if (ATOMIC_GET(&hbdelay) == 0 || maleventstream == 0)
621 0 : return;
622 0 : usec = GDKusec();
623 0 : microseconds = (uint64_t) startup_time.tv_sec * 1000000 +
624 0 : (uint64_t) startup_time.tv_usec + (uint64_t) usec;
625 :
626 : /* get CPU load on beat boundaries only */
627 0 : if (getCPULoad(cpuload))
628 : return;
629 :
630 0 : logbuf = (struct logbuf) { 0 };
631 :
632 0 : if (!logadd(&logbuf, "{")) // fill in later with the event counter
633 : return;
634 0 : if (!GDKinmemory(0) && !GDKembedded()) {
635 0 : char *uuid = NULL, *err;
636 0 : if ((err = msab_getUUID(&uuid)) == NULL) {
637 0 : bool ok = logadd(&logbuf, "\"session\":\"%s\",", uuid);
638 0 : free(uuid);
639 0 : if (!ok)
640 0 : return;
641 : } else
642 0 : free(err);
643 : }
644 0 : if (!logadd(&logbuf, "\"clk\":" LLFMT ",\"ctime\":%" PRIu64 ",\"rss\":%zu,",
645 0 : usec, microseconds, MT_getrss() / 1024 / 1024))
646 : return;
647 : #ifdef HAVE_SYS_RESOURCE_H
648 0 : getrusage(RUSAGE_SELF, &infoUsage);
649 0 : if (infoUsage.ru_inblock - prevUsage.ru_inblock
650 0 : && !logadd(&logbuf, "\"inblock\":%ld,",
651 : infoUsage.ru_inblock - prevUsage.ru_inblock))
652 : return;
653 0 : if (infoUsage.ru_oublock - prevUsage.ru_oublock
654 0 : && !logadd(&logbuf, "\"oublock\":%ld,",
655 : infoUsage.ru_oublock - prevUsage.ru_oublock))
656 : return;
657 0 : if (infoUsage.ru_majflt - prevUsage.ru_majflt
658 0 : && !logadd(&logbuf, "\"majflt\":%ld,",
659 : infoUsage.ru_majflt - prevUsage.ru_majflt))
660 : return;
661 0 : if (infoUsage.ru_nswap - prevUsage.ru_nswap
662 0 : && !logadd(&logbuf, "\"nswap\":%ld,",
663 : infoUsage.ru_nswap - prevUsage.ru_nswap))
664 : return;
665 0 : if (infoUsage.ru_nvcsw - prevUsage.ru_nvcsw
666 0 : && !logadd(&logbuf, "\"nvcsw\":%ld,",
667 0 : infoUsage.ru_nvcsw - prevUsage.ru_nvcsw +
668 0 : infoUsage.ru_nivcsw - prevUsage.ru_nivcsw))
669 : return;
670 0 : prevUsage = infoUsage;
671 : #endif
672 0 : if (!logadd(&logbuf, "\"state\":\"%s\"," "\"cpuload\":%s" "}\n", // end marker
673 : alter, cpuload))
674 : return;
675 0 : logjsonInternal(logbuf.logbuffer, true);
676 0 : logdel(&logbuf);
677 : }
678 :
679 : void
680 45 : profilerEvent(MalEvent *me, NonMalEvent *nme)
681 : {
682 45 : str event = NULL;
683 45 : assert(me == NULL || nme == NULL);
684 : /* ignore profiler monitoring cmds */
685 45 : if (me != NULL && me->cntxt != NULL && getModuleId(me->pci) == myname)
686 : return;
687 :
688 44 : MT_lock_set(&mal_profileLock);
689 44 : if (maleventstream) {
690 0 : if (me != NULL && me->mb != NULL && nme == NULL) {
691 0 : if (me->stk == NULL ||
692 0 : me->pci == NULL ||
693 0 : (profilerMode && me->mb && getPC(me->mb, me->pci) != 0)) {
694 0 : MT_lock_unset(&mal_profileLock);
695 0 : return; /* minimal mode */
696 : }
697 0 : event = prepareMalEvent(me->cntxt, me->mb, me->stk, me->pci,
698 : me->clk, me->duration);
699 : }
700 0 : if (me == NULL && nme != NULL && nme->phase != MAL_ENGINE) {
701 0 : event = prepareNonMalEvent(nme->cntxt, nme->phase, nme->clk,
702 : nme->tid, nme->ts, nme->state,
703 : nme->duration);
704 : }
705 0 : if (event) {
706 0 : logjsonInternal(event, true);
707 0 : GDKfree(event);
708 : }
709 : }
710 44 : MT_lock_unset(&mal_profileLock);
711 : }
712 :
713 : /* The first scheme dumps the events on a stream (and in the pool)
714 : */
715 : str
716 0 : openProfilerStream(Client cntxt, int m)
717 : {
718 : #ifdef HAVE_SYS_RESOURCE_H
719 0 : getrusage(RUSAGE_SELF, &infoUsage);
720 0 : prevUsage = infoUsage;
721 : #endif
722 0 : MT_lock_set(&mal_profileLock);
723 0 : if (myname == 0) {
724 0 : myname = putName("profiler");
725 0 : logjsonInternal(monet_characteristics, true);
726 : }
727 0 : if (maleventstream) {
728 : /* The DBA can always grab the stream, others have to wait */
729 0 : if (cntxt->user == MAL_ADMIN) {
730 0 : closeProfilerStream(cntxt);
731 : } else {
732 0 : MT_lock_unset(&mal_profileLock);
733 0 : throw(MAL, "profiler.start",
734 : "Profiler already running, stream not available");
735 : }
736 : }
737 : /* 4 activates profiler in minimal mode. 1 and 3 were used in prev MonetDB versions */
738 : /* 0 activates profiler in detailed mode */
739 0 : switch (m) {
740 0 : case 0:
741 0 : profilerStatus = -1;
742 0 : break;
743 0 : case 4:
744 0 : profilerStatus = -1;
745 0 : profilerMode = 1;
746 0 : break;
747 0 : default:
748 0 : MT_lock_unset(&mal_profileLock);
749 0 : throw(MAL, "profiler.openstream", "Undefined profiler mode option");
750 : }
751 0 : maleventstream = cntxt->fdout;
752 0 : profilerUser = cntxt->user;
753 :
754 0 : MT_lock_unset(&mal_profileLock);
755 0 : return MAL_SUCCEED;
756 : }
757 :
758 : str
759 1 : closeProfilerStream(Client cntxt)
760 : {
761 1 : (void) cntxt;
762 1 : maleventstream = NULL;
763 1 : profilerStatus = 0;
764 1 : profilerMode = 0;
765 1 : profilerUser = 0;
766 1 : return MAL_SUCCEED;
767 : }
768 :
769 : /* the second scheme is to collect the profile
770 : * events in a local table for direct SQL inspection
771 : */
772 : str
773 1 : startProfiler(Client cntxt)
774 : {
775 : #ifdef HAVE_SYS_RESOURCE_H
776 1 : getrusage(RUSAGE_SELF, &infoUsage);
777 1 : prevUsage = infoUsage;
778 : #endif
779 1 : (void) cntxt;
780 :
781 1 : MT_lock_set(&mal_profileLock);
782 1 : if (maleventstream) {
783 0 : MT_lock_unset(&mal_profileLock);
784 0 : throw(MAL, "profiler.start",
785 : "Profiler already running, stream not available");
786 : }
787 1 : if (myname == 0) {
788 1 : myname = putName("profiler");
789 : }
790 1 : profilerStatus = 1;
791 1 : logjsonInternal(monet_characteristics, true);
792 1 : MT_lock_unset(&mal_profileLock);
793 : // reset the trace table
794 1 : clearTrace(cntxt);
795 :
796 1 : return MAL_SUCCEED;
797 : }
798 :
799 : /* SQL tracing is simplified, because it only collects the events in the temporary table.
800 : */
801 : str
802 22 : startTrace(Client cntxt)
803 : {
804 22 : cntxt->sqlprofiler = TRUE;
805 22 : clearTrace(cntxt);
806 22 : return MAL_SUCCEED;
807 : }
808 :
809 : str
810 26 : stopTrace(Client cntxt)
811 : {
812 26 : cntxt->sqlprofiler = FALSE;
813 26 : return MAL_SUCCEED;
814 : }
815 :
816 : str
817 340 : stopProfiler(Client cntxt)
818 : {
819 340 : MT_lock_set(&mal_profileLock);
820 340 : if (profilerStatus)
821 1 : profilerStatus = 0;
822 340 : setHeartbeat(0); // stop heartbeat
823 340 : if (cntxt)
824 1 : closeProfilerStream(cntxt);
825 340 : MT_lock_unset(&mal_profileLock);
826 340 : return MAL_SUCCEED;
827 : }
828 :
829 : /*
830 : * SQL profile traces
831 : * The events being captured are stored in client specific BATs.
832 : * They are made persistent to accumate information over
833 : * multiple sessions. This means it has to be explicitly reset
834 : * to avoid disc overflow using profiler.reset().
835 : *
836 : * The information returned for the trace is purposely limited
837 : * to the ticks and the final MAL instruction.
838 : * For more detailed analysis, the stethoscope should be used.
839 : */
840 :
841 : static void
842 14 : _cleanupProfiler(Client cntxt)
843 : {
844 14 : BBPreclaim(cntxt->profticks);
845 14 : BBPreclaim(cntxt->profstmt);
846 14 : BBPreclaim(cntxt->profevents);
847 14 : cntxt->profticks = cntxt->profstmt = cntxt->profevents = NULL;
848 14 : }
849 :
850 : static inline BAT *
851 105 : TRACEcreate(int tt)
852 : {
853 105 : return COLnew(0, tt, 1 << 10, TRANSIENT);
854 : }
855 :
856 : static void
857 78 : initTrace(Client cntxt)
858 : {
859 78 : MT_lock_set(&mal_profileLock);
860 78 : if (cntxt->profticks) {
861 43 : MT_lock_unset(&mal_profileLock);
862 43 : return; /* already initialized */
863 : }
864 35 : cntxt->profticks = TRACEcreate(TYPE_lng);
865 35 : cntxt->profstmt = TRACEcreate(TYPE_str);
866 35 : cntxt->profevents = TRACEcreate(TYPE_str);
867 35 : if (cntxt->profticks == NULL || cntxt->profstmt == NULL
868 35 : || cntxt->profevents == NULL)
869 0 : _cleanupProfiler(cntxt);
870 35 : MT_lock_unset(&mal_profileLock);
871 : }
872 :
873 : int
874 51 : TRACEtable(Client cntxt, BAT **r)
875 : {
876 51 : initTrace(cntxt);
877 51 : MT_lock_set(&mal_profileLock);
878 51 : if (cntxt->profticks == NULL) {
879 0 : MT_lock_unset(&mal_profileLock);
880 0 : return -1; /* not initialized */
881 : }
882 51 : r[0] = COLcopy(cntxt->profticks, cntxt->profticks->ttype, false, TRANSIENT);
883 51 : r[1] = COLcopy(cntxt->profstmt, cntxt->profstmt->ttype, false, TRANSIENT);
884 51 : r[2] = COLcopy(cntxt->profevents, cntxt->profevents->ttype, false,
885 : TRANSIENT);
886 51 : MT_lock_unset(&mal_profileLock);
887 51 : if (r[0] == NULL || r[1] == NULL || r[2] == NULL) {
888 0 : BBPreclaim(r[0]);
889 0 : BBPreclaim(r[1]);
890 0 : BBPreclaim(r[2]);
891 0 : return -1;
892 : }
893 : return 3;
894 : }
895 :
896 : BAT *
897 8 : getTrace(Client cntxt, const char *nme)
898 : {
899 8 : BAT *bn = NULL;
900 :
901 8 : MT_lock_set(&mal_profileLock);
902 8 : if (cntxt->profticks) {
903 8 : if (strcmp(nme, "usec") == 0) {
904 4 : bn = COLcopy(cntxt->profticks, cntxt->profticks->ttype, false,
905 : TRANSIENT);
906 4 : } else if (strcmp(nme, "stmt") == 0) {
907 4 : bn = COLcopy(cntxt->profstmt, cntxt->profstmt->ttype, false,
908 : TRANSIENT);
909 0 : } else if (strcmp(nme, "events") == 0) {
910 0 : bn = COLcopy(cntxt->profevents, cntxt->profevents->ttype, false,
911 : TRANSIENT);
912 : }
913 : }
914 8 : MT_lock_unset(&mal_profileLock);
915 8 : return bn;
916 : }
917 :
918 : void
919 27 : clearTrace(Client cntxt)
920 : {
921 27 : (void) cntxt;
922 27 : MT_lock_set(&mal_profileLock);
923 27 : if (cntxt->profticks == NULL) {
924 13 : MT_lock_unset(&mal_profileLock);
925 13 : initTrace(cntxt);
926 13 : return; /* not initialized */
927 : }
928 : /* drop all trace tables */
929 14 : _cleanupProfiler(cntxt);
930 14 : MT_lock_unset(&mal_profileLock);
931 14 : initTrace(cntxt);
932 : }
933 :
934 : str
935 0 : cleanupTraces(Client cntxt)
936 : {
937 0 : clearTrace(cntxt);
938 0 : return MAL_SUCCEED;
939 : }
940 :
941 : void
942 957 : sqlProfilerEvent(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr pci,
943 : lng clk, lng ticks)
944 : {
945 957 : str stmt, c, ev;
946 957 : int errors = 0;
947 :
948 957 : if (cntxt->profticks == NULL)
949 : return;
950 :
951 : /* generate actual call statement */
952 957 : stmt = instruction2str(mb, stk, pci, LIST_MAL_ALL | LIST_MAL_ALGO);
953 960 : c = stmt;
954 :
955 : /* unclear why we needed this. OLD?
956 : while (c && *c && (isspace((unsigned char)*c) || *c == '!'))
957 : c++;
958 : */
959 :
960 960 : ev = prepareMalEvent(cntxt, mb, stk, pci, clk, ticks);
961 : // keep it a short transaction
962 954 : MT_lock_set(&mal_profileLock);
963 966 : if (cntxt->profticks == NULL) {
964 0 : MT_lock_unset(&mal_profileLock);
965 0 : GDKfree(stmt);
966 0 : return;
967 : }
968 966 : errors += BUNappend(cntxt->profticks, &ticks, false) != GDK_SUCCEED;
969 966 : errors += BUNappend(cntxt->profstmt, c, false) != GDK_SUCCEED;
970 966 : errors += BUNappend(cntxt->profevents, ev ? ev : str_nil,
971 966 : false) != GDK_SUCCEED;
972 966 : if (errors > 0) {
973 : /* stop profiling if an error occurred */
974 0 : cntxt->sqlprofiler = FALSE;
975 : }
976 :
977 966 : MT_lock_unset(&mal_profileLock);
978 966 : GDKfree(stmt);
979 966 : GDKfree(ev);
980 : }
981 :
982 : lng
983 0 : getDiskWrites(void)
984 : {
985 : #ifdef HAVE_SYS_RESOURCE_H
986 0 : struct rusage infoUsage;
987 0 : getrusage(RUSAGE_SELF, &infoUsage);
988 0 : return infoUsage.ru_oublock;
989 : #else
990 : return 0;
991 : #endif
992 : }
993 :
994 : lng
995 0 : getDiskReads(void)
996 : {
997 : #ifdef HAVE_SYS_RESOURCE_H
998 0 : struct rusage infoUsage;
999 0 : getrusage(RUSAGE_SELF, &infoUsage);
1000 0 : return infoUsage.ru_inblock;
1001 : #else
1002 : return 0;
1003 : #endif
1004 : }
1005 :
1006 : lng
1007 0 : getUserTime(void)
1008 : {
1009 : #ifdef HAVE_TIMES
1010 0 : struct tms newTms;
1011 0 : times(&newTms);
1012 0 : return newTms.tms_utime;
1013 : #else
1014 : return 0;
1015 : #endif
1016 : }
1017 :
1018 : lng
1019 0 : getSystemTime(void)
1020 : {
1021 : #ifdef HAVE_TIMES
1022 0 : struct tms newTms;
1023 0 : times(&newTms);
1024 0 : return newTms.tms_stime;
1025 : #else
1026 : return 0;
1027 : #endif
1028 : }
1029 :
1030 : /* Calculate a pessimistic size of the disk storage */
1031 : lng
1032 0 : getDiskSpace(void)
1033 : {
1034 0 : BAT *b;
1035 0 : bat i;
1036 0 : lng size = 0;
1037 :
1038 0 : for (i = 1; i < getBBPsize(); i++)
1039 0 : if (BBP_logical(i) && (BBP_refs(i) || BBP_lrefs(i))) {
1040 0 : b = BATdescriptor(i);
1041 0 : if (b) {
1042 0 : size += sizeof(BAT);
1043 :
1044 0 : MT_lock_set(&b->theaplock);
1045 0 : if (!isVIEW(b)) {
1046 0 : BUN cnt = BATcount(b);
1047 :
1048 : /* the upperbound is used for the heaps */
1049 0 : if (b->tvheap)
1050 0 : size += HEAPvmsize(b->tvheap);
1051 0 : MT_lock_unset(&b->theaplock);
1052 :
1053 0 : size += tailsize(b, cnt);
1054 0 : if (b->thash)
1055 0 : size += sizeof(BUN) * cnt;
1056 : /* also add the size of an imprint, ordered index or mosaic */
1057 0 : if (b->timprints)
1058 0 : size += IMPSimprintsize(b);
1059 0 : if (b->torderidx)
1060 0 : size += HEAPvmsize(b->torderidx);
1061 : } else {
1062 0 : MT_lock_unset(&b->theaplock);
1063 : }
1064 0 : BBPunfix(i);
1065 : }
1066 : }
1067 0 : return size;
1068 : }
1069 :
1070 :
1071 : void
1072 10 : profilerGetCPUStat(lng *user, lng *nice, lng *sys, lng *idle, lng *iowait)
1073 : {
1074 10 : (void) getCPULoad(NULL);
1075 10 : *user = corestat[LASTCPU].user;
1076 10 : *nice = corestat[LASTCPU].nice;
1077 10 : *sys = corestat[LASTCPU].system;
1078 10 : *idle = corestat[LASTCPU].idle;
1079 10 : *iowait = corestat[LASTCPU].iowait;
1080 10 : }
1081 :
1082 : /* the heartbeat process produces a ping event once every X milliseconds */
1083 : static MT_Id hbthread;
1084 : static ATOMIC_TYPE hbrunning = ATOMIC_VAR_INIT(0);
1085 :
1086 : static void
1087 340 : profilerHeartbeat(void *dummy)
1088 : {
1089 340 : int t;
1090 352 : const int timeout = ATOMIC_GET(&GDKdebug) & FORCEMITOMASK ? 10 : 25;
1091 :
1092 340 : (void) dummy;
1093 340 : for (;;) {
1094 : /* wait until you need this info */
1095 340 : MT_thread_setworking("sleeping");
1096 72958 : while (ATOMIC_GET(&hbdelay) == 0 || maleventstream == NULL) {
1097 72958 : if (GDKexiting() || !ATOMIC_GET(&hbrunning))
1098 339 : return;
1099 72619 : MT_sleep_ms(timeout);
1100 : }
1101 0 : for (t = (int) ATOMIC_GET(&hbdelay); t > 0; t -= timeout) {
1102 0 : if (GDKexiting() || !ATOMIC_GET(&hbrunning))
1103 0 : return;
1104 0 : MT_sleep_ms(t > timeout ? timeout : t);
1105 : }
1106 0 : if (GDKexiting() || !ATOMIC_GET(&hbrunning))
1107 0 : return;
1108 0 : MT_thread_setworking("pinging");
1109 0 : profilerHeartbeatEvent("ping");
1110 : }
1111 : }
1112 :
1113 : void
1114 679 : setHeartbeat(int delay)
1115 : {
1116 679 : if (delay < 0) {
1117 339 : ATOMIC_SET(&hbrunning, 0);
1118 339 : if (hbthread)
1119 339 : MT_join_thread(hbthread);
1120 339 : return;
1121 : }
1122 340 : if (delay > 0 && delay <= 10)
1123 0 : delay = 10;
1124 340 : ATOMIC_SET(&hbdelay, delay);
1125 : }
1126 :
1127 : /* TODO getprofilerlimit and setprofilerlimit functions */
1128 :
1129 : int
1130 2 : getprofilerlimit(void)
1131 : {
1132 2 : return 0;
1133 : }
1134 :
1135 : void
1136 0 : setprofilerlimit(int limit)
1137 : {
1138 0 : (void) limit;
1139 0 : }
1140 :
1141 : void
1142 340 : initProfiler(void)
1143 : {
1144 340 : gettimeofday(&startup_time, NULL);
1145 340 : }
1146 :
1147 : void
1148 340 : initHeartbeat(void)
1149 : {
1150 340 : ATOMIC_SET(&hbrunning, 1);
1151 340 : if (MT_create_thread(&hbthread, profilerHeartbeat, NULL, MT_THR_JOINABLE,
1152 : "heartbeat") < 0) {
1153 : /* it didn't happen */
1154 0 : hbthread = 0;
1155 0 : ATOMIC_SET(&hbrunning, 0);
1156 : }
1157 340 : }
|