Line data Source code
1 : /*
2 : * SPDX-License-Identifier: MPL-2.0
3 : *
4 : * This Source Code Form is subject to the terms of the Mozilla Public
5 : * License, v. 2.0. If a copy of the MPL was not distributed with this
6 : * file, You can obtain one at http://mozilla.org/MPL/2.0/.
7 : *
8 : * Copyright 2024 MonetDB Foundation;
9 : * Copyright August 2008 - 2023 MonetDB B.V.;
10 : * Copyright 1997 - July 2008 CWI.
11 : */
12 :
13 : /* (c) M.L. Kersten
14 : * Performance tracing
15 : * The stethoscope/tachograph and tomograph performance monitors have exclusive
16 : * access to a single event stream, which avoids concurrency conflicts amongst
17 : * clients.
18 : * It also avoid cluthered event records on the stream. Since this event stream
19 : * is owned by a client, we should ensure that the profiler is automatically
20 : * reset once the owner leaves.
21 : */
22 : #include "monetdb_config.h"
23 : #include "mutils.h" /* mercurial_revision */
24 : #include "msabaoth.h" /* msab_getUUID */
25 : #include "mal_authorize.h"
26 : #include "mal_function.h"
27 : #include "mal_listing.h"
28 : #include "mal_profiler.h"
29 : #include "mal_runtime.h"
30 : #include "mal_utils.h"
31 : #include "mal_resource.h"
32 : #include "mal_internal.h"
33 :
34 : #ifdef HAVE_SYS_TIME_H
35 : #include <sys/time.h>
36 : #endif
37 :
38 : #include <string.h>
39 :
40 : static const char *myname = 0; // avoid tracing the profiler module
41 :
42 : /* The JSON rendering can be either using '\n' separators between
43 : * each key:value pair or as a single line.
44 : * The current stethoscope implementation requires the first option and
45 : * also the term rendering to be set to ''
46 : */
47 :
48 : int profilerStatus = 0; /* global flag profiler status */
49 : int profilerMode = 0; /* global flag profiler mode, minimal or detailed */
50 : static oid profilerUser; /* keep track on who has claimed the channel */
51 :
52 : static struct timeval startup_time;
53 :
54 : static ATOMIC_TYPE hbdelay = ATOMIC_VAR_INIT(0);
55 :
56 : #ifdef HAVE_SYS_RESOURCE_H
57 : #include <sys/resource.h>
58 : struct rusage infoUsage;
59 : static struct rusage prevUsage;
60 : #endif
61 :
62 : #define LOGLEN 8192
63 :
64 : static void
65 2 : logjsonInternal(char *logbuffer, bool flush)
66 : {
67 2 : size_t len;
68 2 : len = strlen(logbuffer);
69 :
70 2 : (void) mnstr_write(maleventstream, logbuffer, 1, len);
71 2 : if (flush)
72 1 : (void) mnstr_flush(maleventstream, MNSTR_FLUSH_DATA);
73 2 : }
74 :
75 : /*
76 : * We use a buffer (`logbuffer`) where we incrementally create the output JSON
77 : * object. Initially we allocate LOGLEN (8K)
78 : * bytes and we keep the capacity of the buffer (`logcap`) and the length of the
79 : * current string (`loglen`).
80 : *
81 : * We use the `logadd` function to add data to our buffer (usually key-value
82 : * pairs). This macro offers an interface similar to printf.
83 : *
84 : * The first snprintf below happens in a statically allocated buffer that might
85 : * be much smaller than logcap. We do not care. We only need to perform this
86 : * snprintf to get the actual length of the string that is to be produced.
87 : *
88 : * There are three cases:
89 : *
90 : * 1. The new string fits in the current buffer -> we just update the buffer
91 : *
92 : * 2. The new string does not fit in the current buffer, but is smaller than the
93 : * capacity of the buffer -> we output the current contents of the buffer and
94 : * start at the beginning.
95 : *
96 : * 3. The new string exceeds the current capacity of the buffer -> we output the
97 : * current contents and reallocate the buffer. The new capacity is 1.5 times the
98 : * length of the new string.
99 : */
100 : struct logbuf {
101 : char *logbuffer;
102 : char *logbase;
103 : size_t loglen;
104 : size_t logcap;
105 : };
106 :
107 : static inline void
108 859 : lognew(struct logbuf *logbuf)
109 : {
110 859 : logbuf->loglen = 0;
111 859 : logbuf->logbase = logbuf->logbuffer;
112 859 : *logbuf->logbase = 0;
113 859 : }
114 :
115 : static inline void
116 0 : logdel(struct logbuf *logbuf)
117 : {
118 0 : GDKfree(logbuf->logbuffer);
119 0 : logbuf->logbuffer = NULL;
120 0 : }
121 :
122 : static bool logadd(struct logbuf *logbuf,
123 : _In_z_ _Printf_format_string_ const char *fmt, ...)
124 : __attribute__((__format__(__printf__, 2, 3)))
125 : __attribute__((__warn_unused_result__));
126 : static bool
127 33852 : logadd(struct logbuf *logbuf, const char *fmt, ...)
128 : {
129 33852 : char tmp_buff[LOGLEN];
130 33852 : int tmp_len;
131 33852 : va_list va;
132 :
133 33852 : va_start(va, fmt);
134 33852 : tmp_len = vsnprintf(tmp_buff, sizeof(tmp_buff), fmt, va);
135 33852 : va_end(va);
136 33852 : if (tmp_len < 0) {
137 0 : logdel(logbuf);
138 0 : return false;
139 : }
140 33852 : if (logbuf->loglen + (size_t) tmp_len >= logbuf->logcap) {
141 859 : if ((size_t) tmp_len >= logbuf->logcap) {
142 : /* includes first time when logbuffer == NULL and logcap == 0 */
143 858 : char *alloc_buff;
144 858 : if (logbuf->loglen > 0)
145 0 : logjsonInternal(logbuf->logbuffer, false);
146 858 : logbuf->logcap = (size_t) tmp_len + (size_t) tmp_len / 2;
147 858 : if (logbuf->logcap < LOGLEN)
148 858 : logbuf->logcap = LOGLEN;
149 858 : alloc_buff = GDKrealloc(logbuf->logbuffer, logbuf->logcap);
150 858 : if (alloc_buff == NULL) {
151 0 : TRC_ERROR(MAL_SERVER,
152 : "Profiler JSON buffer reallocation failure\n");
153 0 : logdel(logbuf);
154 0 : return false;
155 : }
156 858 : logbuf->logbuffer = alloc_buff;
157 858 : lognew(logbuf);
158 : } else {
159 1 : logjsonInternal(logbuf->logbuffer, false);
160 1 : lognew(logbuf);
161 : }
162 : }
163 33852 : if (tmp_len > 0) {
164 33852 : va_start(va, fmt);
165 33852 : logbuf->loglen += vsnprintf(logbuf->logbase + logbuf->loglen,
166 33852 : logbuf->logcap - logbuf->loglen, fmt, va);
167 33852 : va_end(va);
168 : }
169 : return true;
170 : }
171 :
172 : static str phase_descriptions[] = {
173 : [CLIENT_START] = "session_start",
174 : [CLIENT_END] = "session_end",
175 : [TEXT_TO_SQL] = "text_to_sql",
176 : [SQL_TO_REL] = "sql_to_rel",
177 : [REL_OPT] = "rel_opt",
178 : [REL_TO_MAL] = "rel_to_mal",
179 : [MAL_OPT] = "mal_opt",
180 : [MAL_ENGINE] = "mal_engine",
181 : [COMMIT] = "trans_commit",
182 : [ROLLBACK] = "trans_rollback",
183 : [CONFLICT] = "trans_conflict"
184 : };
185 :
186 : static str
187 0 : prepareNonMalEvent(Client cntxt, enum event_phase phase, ulng clk, ulng *tstart,
188 : ulng *tend, int state, ulng duration)
189 : {
190 0 : oid *tag = NULL;
191 0 : str query = NULL;
192 0 : struct logbuf logbuf = { 0 };
193 :
194 0 : uint64_t mclk = (uint64_t) clk -
195 0 : ((uint64_t) startup_time.tv_sec * 1000000 -
196 0 : (uint64_t) startup_time.tv_usec);
197 :
198 0 : assert(cntxt);
199 0 : int sessionid = cntxt->idx;
200 0 : if (cntxt->curprg)
201 0 : tag = &cntxt->curprg->def->tag;
202 0 : if (cntxt->query
203 0 : && (query = mal_quote(cntxt->query, strlen(cntxt->query))) == NULL)
204 : return NULL;
205 :
206 0 : if (!logadd(&logbuf, "{\"sessionid\":\"%d\"", sessionid))
207 0 : goto cleanup_and_exit;
208 0 : if (!logadd(&logbuf, ", \"clk\":" ULLFMT "", mclk))
209 0 : goto cleanup_and_exit;
210 0 : if (!logadd(&logbuf, ", \"thread\":%zu, \"phase\":\"%s\"",
211 : MT_getpid(), phase_descriptions[phase]))
212 0 : goto cleanup_and_exit;
213 0 : if (tstart && !logadd(&logbuf, ", \"tstart\":" ULLFMT, *tstart))
214 0 : goto cleanup_and_exit;
215 0 : if (tend && !logadd(&logbuf, ", \"tend\":" ULLFMT, *tend))
216 0 : goto cleanup_and_exit;
217 0 : if (tag && !logadd(&logbuf, ", \"tag\":" OIDFMT, *tag))
218 0 : goto cleanup_and_exit;
219 0 : if (query && phase == TEXT_TO_SQL
220 0 : && !logadd(&logbuf, ", \"query\":\"%s\"", query))
221 0 : goto cleanup_and_exit;
222 0 : if (state != 0 && !logadd(&logbuf, ", \"state\":\"error\""))
223 0 : goto cleanup_and_exit;
224 0 : if (!logadd(&logbuf, ", \"usec\":" ULLFMT "}\n", duration))
225 0 : goto cleanup_and_exit;
226 0 : GDKfree(query);
227 0 : return logbuf.logbuffer;
228 0 : cleanup_and_exit:
229 0 : GDKfree(query);
230 0 : logdel(&logbuf);
231 0 : return NULL;
232 : }
233 :
234 : static inline str
235 1896 : format_val2json(const ValPtr res)
236 : {
237 1896 : char *buf = NULL;
238 1896 : size_t sz = 0;
239 :
240 3789 : if (BATatoms[res->vtype].atomNull &&
241 1893 : (!VALget(res)
242 1893 : || BATatoms[res->vtype].atomCmp(VALget(res),
243 : BATatoms[res->vtype].atomNull) == 0))
244 241 : return GDKstrdup("\"nil\"");
245 :
246 1652 : bool use_external = true;
247 :
248 1652 : switch (res->vtype) {
249 437 : case TYPE_bte:
250 : case TYPE_sht:
251 : case TYPE_int:
252 : case TYPE_flt:
253 : case TYPE_dbl:
254 : case TYPE_lng:
255 : #ifdef HAVE_HGE
256 : case TYPE_hge:
257 : #endif
258 437 : use_external = false;
259 : }
260 :
261 1652 : if ((*BATatoms[res->vtype].atomToStr) (&buf, &sz, VALptr(res),
262 : use_external) < 0)
263 : return NULL;
264 :
265 1652 : if (!use_external || res->vtype == TYPE_str)
266 808 : return buf;
267 :
268 844 : ValRecord val;
269 844 : if (VALinit(&val, TYPE_str, buf) == NULL) {
270 0 : GDKfree(buf);
271 0 : return NULL;
272 : }
273 :
274 844 : GDKfree(buf);
275 :
276 844 : char *buf2;
277 844 : buf2 = VALformat(&val);
278 844 : VALclear(&val);
279 :
280 844 : return buf2;
281 : }
282 :
283 : static str
284 857 : prepareMalEvent(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr pci,
285 : lng clk, lng ticks)
286 : {
287 857 : struct logbuf logbuf;
288 857 : str c;
289 857 : uint64_t mclk;
290 857 : bool ok;
291 857 : const char *algo = MT_thread_getalgorithm();
292 :
293 : /* The stream of events can be complete read by the DBA,
294 : * all other users can only see events assigned to their account
295 : */
296 857 : if (profilerUser != MAL_ADMIN && profilerUser != cntxt->user)
297 : return NULL;
298 :
299 : /* align the variable namings with EXPLAIN and TRACE */
300 857 : if (pci->pc == 1)
301 40 : renameVariables(mb);
302 :
303 857 : logbuf = (struct logbuf) { 0 };
304 :
305 857 : mclk = (uint64_t) clk - ((uint64_t) startup_time.tv_sec * 1000000 -
306 857 : (uint64_t) startup_time.tv_usec);
307 : /* make profile event tuple */
308 1715 : if (!logadd(&logbuf, "{" // fill in later with the event counter
309 : "\"sessionid\":\"%d\""
310 : ",\"clk\":%" PRIu64 ""
311 : ",\"thread\":%zu"
312 : ",\"phase\":\"%s\""
313 : ",\"pc\":%d"
314 : ",\"tag\":" OIDFMT,
315 : cntxt->idx,
316 : mclk,
317 : MT_getpid(),
318 : phase_descriptions[MAL_ENGINE],
319 857 : mb ? getPC(mb, pci) : 0, stk ? stk->tag : 0))
320 0 : goto cleanup_and_exit;
321 858 : if (pci->modname
322 822 : && !logadd(&logbuf, ",\"module\":\"%s\"",
323 : pci->modname ? pci->modname : ""))
324 0 : goto cleanup_and_exit;
325 858 : if (pci->fcnname
326 822 : && !logadd(&logbuf, ",\"function\":\"%s\"",
327 : pci->fcnname ? pci->fcnname : ""))
328 0 : goto cleanup_and_exit;
329 858 : if (pci->barrier
330 4 : && !logadd(&logbuf, ",\"barrier\":\"%s\"", operatorName(pci->barrier)))
331 0 : goto cleanup_and_exit;
332 930 : if ((pci->token < FCNcall || pci->token > PATcall) &&
333 72 : !logadd(&logbuf, ",\"operator\":\"%s\"", operatorName(pci->token)))
334 0 : goto cleanup_and_exit;
335 858 : if (!logadd(&logbuf, ",\"usec\":" LLFMT, ticks))
336 0 : goto cleanup_and_exit;
337 858 : if (algo && !logadd(&logbuf, ",\"algorithm\":\"%s\"", algo))
338 0 : goto cleanup_and_exit;
339 858 : if (mb && pci->modname && pci->fcnname) {
340 822 : int j;
341 :
342 822 : if (profilerMode == 0 && stk) {
343 822 : if (!logadd(&logbuf, ",\"args\":["))
344 0 : goto cleanup_and_exit;
345 4264 : for (j = 0; j < pci->argc; j++) {
346 3442 : int tpe = getVarType(mb, getArg(pci, j));
347 3442 : str tname = 0, cv;
348 3442 : lng total = 0;
349 3442 : BUN cnt = 0;
350 3442 : bat bid = 0;
351 :
352 3442 : if (j == 0) {
353 : // No comma at the beginning
354 822 : if (!logadd(&logbuf, "{"))
355 0 : goto cleanup_and_exit;
356 : } else {
357 2620 : if (!logadd(&logbuf, ",{"))
358 0 : goto cleanup_and_exit;
359 : }
360 6884 : if (!logadd(&logbuf, "\"%s\":%d,\"var\":\"%s\"",
361 3442 : j < pci->retc ? "ret" : "arg", j,
362 : getVarName(mb, getArg(pci, j))))
363 0 : goto cleanup_and_exit;
364 3442 : c = getVarName(mb, getArg(pci, j));
365 3441 : if (getVarSTC(mb, getArg(pci, j))) {
366 0 : InstrPtr stc = getInstrPtr(mb, getVarSTC(mb, getArg(pci, j)));
367 0 : if (stc && getModuleId(stc)
368 0 : && strcmp(getModuleId(stc), "sql") == 0
369 0 : && strncmp(getFunctionId(stc), "bind", 4) == 0
370 0 : && !logadd(&logbuf, ",\"alias\":\"%s.%s.%s\"",
371 0 : getVarConstant(mb,
372 : getArg(stc,
373 : stc->retc +
374 : 1)).val.sval,
375 0 : getVarConstant(mb,
376 : getArg(stc,
377 : stc->retc +
378 : 2)).val.sval,
379 0 : getVarConstant(mb,
380 : getArg(stc,
381 : stc->retc +
382 : 3)).val.sval))
383 0 : goto cleanup_and_exit;
384 : }
385 3441 : if (isaBatType(tpe)) {
386 1548 : BAT *d = BATdescriptor(bid = stk->stk[getArg(pci, j)].val.bval);
387 1548 : tname = getTypeName(getBatType(tpe));
388 1548 : ok = logadd(&logbuf, ",\"type\":\"bat[:%s]\"", tname);
389 1549 : GDKfree(tname);
390 1549 : if (!ok) {
391 0 : BBPreclaim(d);
392 0 : goto cleanup_and_exit;
393 : }
394 1549 : if (d) {
395 1545 : MT_lock_set(&d->theaplock);
396 1545 : BATiter di = bat_iterator_nolock(d);
397 : /* outside the lock we cannot dereference di.h or di.vh,
398 : * but we can use all values without dereference and
399 : * without further locking */
400 1545 : MT_lock_unset(&d->theaplock);
401 1545 : cnt = di.count;
402 1545 : if (isVIEW(d)) {
403 204 : BAT *v = BBP_desc(VIEWtparent(d));
404 204 : bool vtransient = true;
405 204 : if (v) {
406 188 : MT_lock_set(&v->theaplock);
407 188 : vtransient = v->batTransient;
408 188 : MT_lock_unset(&v->theaplock);
409 : }
410 392 : if (!logadd(&logbuf,
411 : ",\"view\":\"true\""
412 : ",\"parent\":%d"
413 : ",\"seqbase\":" BUNFMT
414 : ",\"mode\":\"%s\"",
415 204 : VIEWtparent(d),
416 : d->hseqbase,
417 : vtransient ? "transient" :
418 : "persistent")) {
419 0 : BBPunfix(d->batCacheid);
420 0 : goto cleanup_and_exit;
421 : }
422 : } else {
423 1341 : if (!logadd(&logbuf, ",\"mode\":\"%s\"",
424 1341 : (di.transient ? "transient" : "persistent"))) {
425 0 : BBPunfix(d->batCacheid);
426 0 : goto cleanup_and_exit;
427 : }
428 : }
429 1545 : if (!logadd(&logbuf,
430 : ",\"sorted\":%d"
431 : ",\"revsorted\":%d"
432 : ",\"nonil\":%d"
433 : ",\"nil\":%d"
434 : ",\"key\":%d",
435 1545 : di.sorted,
436 1545 : di.revsorted, di.nonil, di.nil, di.key)) {
437 0 : BBPunfix(d->batCacheid);
438 0 : goto cleanup_and_exit;
439 : }
440 1716 : if ((di.minpos != BUN_NONE &&
441 171 : !logadd(&logbuf, ",\"minpos\":\"" BUNFMT "\"",
442 1545 : di.minpos)) || (di.maxpos != BUN_NONE
443 172 : && !logadd(&logbuf,
444 : ",\"maxpos\":\""
445 : BUNFMT "\"",
446 : di.maxpos))
447 1545 : || (di.unique_est != 0
448 397 : && !logadd(&logbuf, ",\"nestimate\":\"%g\"",
449 : di.unique_est))) {
450 0 : BBPunfix(d->batCacheid);
451 0 : goto cleanup_and_exit;
452 : }
453 :
454 1545 : cv = VALformat(&stk->stk[getArg(pci, j)]);
455 1545 : if (cv) {
456 1545 : c = strchr(cv, '>');
457 1545 : if (c) /* unlikely that this isn't true */
458 1545 : *c = 0;
459 1545 : ok = logadd(&logbuf, ",\"file\":\"%s\"", cv + 1);
460 1545 : GDKfree(cv);
461 1545 : if (!ok) {
462 0 : BBPunfix(d->batCacheid);
463 0 : goto cleanup_and_exit;
464 : }
465 : }
466 1545 : total += cnt << di.shift;
467 1545 : if (!logadd(&logbuf, ",\"width\":%d", di.width)) {
468 0 : BBPunfix(d->batCacheid);
469 0 : goto cleanup_and_exit;
470 : }
471 : /* keeping information about the individual auxiliary heaps is helpful during analysis. */
472 1545 : MT_rwlock_rdlock(&d->thashlock);
473 1545 : if (d->thash
474 0 : && !logadd(&logbuf, ",\"hash\":" LLFMT,
475 0 : (lng) hashinfo(d->thash,
476 : d->batCacheid))) {
477 0 : MT_rwlock_rdunlock(&d->thashlock);
478 0 : BBPunfix(d->batCacheid);
479 0 : goto cleanup_and_exit;
480 : }
481 1545 : MT_rwlock_rdunlock(&d->thashlock);
482 1545 : if (di.vh
483 227 : && !logadd(&logbuf, ",\"vheap\":" BUNFMT,
484 : di.vhfree)) {
485 0 : BBPunfix(d->batCacheid);
486 0 : goto cleanup_and_exit;
487 : }
488 1545 : if (d->timprints
489 0 : && !logadd(&logbuf, ",\"imprints\":" LLFMT,
490 0 : (lng) IMPSimprintsize(d))) {
491 0 : BBPunfix(d->batCacheid);
492 0 : goto cleanup_and_exit;
493 : }
494 : /* if (!logadd(&logbuf, "\"debug\":\"%s\",", d->debugmessages)) goto cleanup_and_exit; */
495 1545 : BBPunfix(d->batCacheid);
496 : }
497 1549 : if (!logadd(&logbuf,
498 : ",\"bid\":%d"
499 : ",\"count\":" BUNFMT
500 : ",\"size\":" LLFMT, bid, cnt, total))
501 0 : goto cleanup_and_exit;
502 : } else {
503 1893 : tname = getTypeName(tpe);
504 3786 : ok = logadd(&logbuf,
505 : ",\"type\":\"%s\""
506 : ",\"const\":%d",
507 1893 : tname, isVarConstant(mb, getArg(pci, j)));
508 1893 : GDKfree(tname);
509 1893 : if (!ok)
510 0 : goto cleanup_and_exit;
511 1893 : cv = format_val2json(&stk->stk[getArg(pci, j)]);
512 1893 : if (cv)
513 1893 : ok = logadd(&logbuf, ",\"value\":%s", cv);
514 3786 : GDKfree(cv);
515 1893 : if (!ok)
516 0 : goto cleanup_and_exit;
517 : }
518 3442 : if (!logadd(&logbuf, ",\"eol\":%d", getVarEolife(mb, getArg(pci, j))))
519 0 : goto cleanup_and_exit;
520 : // if (!logadd(&logbuf, ",\"fixed\":%d", isVarFixed(mb,getArg(pci,j)))) return NULL;
521 3442 : if (!logadd(&logbuf, "}"))
522 0 : goto cleanup_and_exit;
523 : }
524 822 : if (!logadd(&logbuf, "]")) // end marker for arguments
525 0 : goto cleanup_and_exit;
526 : }
527 : }
528 858 : if (!logadd(&logbuf, "}\n")) // end marker
529 0 : goto cleanup_and_exit;
530 858 : return logbuf.logbuffer;
531 0 : cleanup_and_exit:
532 0 : logdel(&logbuf);
533 0 : return NULL;
534 : }
535 :
536 : /* the OS details on cpu load are read from /proc/stat
537 : * We should use an OS define to react to the maximal cores
538 : */
539 : #define MAXCORES 256
540 : #define LASTCPU (MAXCORES - 1)
541 : static struct {
542 : lng user, nice, system, idle, iowait;
543 : double load;
544 : } corestat[MAXCORES];
545 :
546 : static int
547 10 : getCPULoad(char cpuload[BUFSIZ])
548 : {
549 10 : int cpu, len = 0, i;
550 10 : lng user, nice, system, idle, iowait;
551 10 : size_t n;
552 10 : char buf[512], *s;
553 10 : static FILE *proc = NULL;
554 10 : lng newload;
555 :
556 10 : if (proc == NULL) {
557 1 : proc = fopen("/proc/stat", "r");
558 1 : if (proc == NULL) {
559 : /* unexpected */
560 : return -1;
561 : }
562 : } else
563 9 : rewind(proc);
564 :
565 130 : while (fgets(buf, (int) sizeof(buf), proc) != NULL) {
566 120 : n = strlen(buf);
567 120 : if (strncmp(buf, "cpu", 3) == 0) {
568 50 : s = buf + 3;
569 50 : if (*s == ' ') {
570 : cpu = LASTCPU; // the cpu totals stored here
571 : } else {
572 40 : cpu = atoi(s);
573 40 : if (cpu < 0 || cpu > LASTCPU)
574 10 : cpu = LASTCPU;
575 : }
576 50 : s = strchr(s, ' ');
577 50 : if (s == NULL) /* unexpected format of file */
578 : break;
579 110 : while (*s && isspace((unsigned char) *s))
580 60 : s++;
581 50 : i = sscanf(s, LLSCN " " LLSCN " " LLSCN " " LLSCN " " LLSCN, &user,
582 : &nice, &system, &idle, &iowait);
583 50 : if (i == 5) {
584 50 : newload = (user - corestat[cpu].user + nice - corestat[cpu].nice +
585 50 : system - corestat[cpu].system);
586 50 : if (newload)
587 8 : corestat[cpu].load = (double) newload / (newload + idle -
588 8 : corestat[cpu].idle + iowait -
589 8 : corestat[cpu].iowait);
590 50 : corestat[cpu].user = user;
591 50 : corestat[cpu].nice = nice;
592 50 : corestat[cpu].system = system;
593 50 : corestat[cpu].idle = idle;
594 50 : corestat[cpu].iowait = iowait;
595 : }
596 : }
597 :
598 140 : while (buf[n - 1] != '\n') {
599 20 : if (fgets(buf, (int) sizeof(buf), proc) == NULL)
600 0 : goto exitloop;
601 20 : n = strlen(buf);
602 : }
603 : }
604 10 : exitloop:
605 :
606 10 : if (cpuload == NULL)
607 : return 0;
608 : // identify core processing
609 0 : len += snprintf(cpuload, BUFSIZ, "[");
610 0 : for (cpu = 0; cpuload && cpu < LASTCPU && corestat[cpu].user; cpu++) {
611 0 : len += snprintf(cpuload + len, BUFSIZ - len, "%s%.2f", (cpu ? "," : ""),
612 : corestat[cpu].load);
613 : }
614 0 : (void) snprintf(cpuload + len, BUFSIZ - len, "]");
615 0 : return 0;
616 : }
617 :
618 : void
619 0 : profilerHeartbeatEvent(char *alter)
620 : {
621 0 : char cpuload[BUFSIZ];
622 0 : struct logbuf logbuf;
623 0 : lng usec;
624 0 : uint64_t microseconds;
625 :
626 0 : if (ATOMIC_GET(&hbdelay) == 0 || maleventstream == 0)
627 0 : return;
628 0 : usec = GDKusec();
629 0 : microseconds = (uint64_t) startup_time.tv_sec * 1000000 +
630 0 : (uint64_t) startup_time.tv_usec + (uint64_t) usec;
631 :
632 : /* get CPU load on beat boundaries only */
633 0 : if (getCPULoad(cpuload))
634 : return;
635 :
636 0 : logbuf = (struct logbuf) { 0 };
637 :
638 0 : if (!logadd(&logbuf, "{")) // fill in later with the event counter
639 : return;
640 0 : if (!GDKinmemory(0) && !GDKembedded()) {
641 0 : char *uuid = NULL, *err;
642 0 : if ((err = msab_getUUID(&uuid)) == NULL) {
643 0 : bool ok = logadd(&logbuf, "\"session\":\"%s\",", uuid);
644 0 : free(uuid);
645 0 : if (!ok)
646 0 : return;
647 : } else
648 0 : free(err);
649 : }
650 0 : if (!logadd(&logbuf, "\"clk\":" LLFMT ",\"ctime\":%" PRIu64 ",\"rss\":%zu,",
651 0 : usec, microseconds, MT_getrss() / 1024 / 1024))
652 : return;
653 : #ifdef HAVE_SYS_RESOURCE_H
654 0 : getrusage(RUSAGE_SELF, &infoUsage);
655 0 : if (infoUsage.ru_inblock - prevUsage.ru_inblock
656 0 : && !logadd(&logbuf, "\"inblock\":%ld,",
657 : infoUsage.ru_inblock - prevUsage.ru_inblock))
658 : return;
659 0 : if (infoUsage.ru_oublock - prevUsage.ru_oublock
660 0 : && !logadd(&logbuf, "\"oublock\":%ld,",
661 : infoUsage.ru_oublock - prevUsage.ru_oublock))
662 : return;
663 0 : if (infoUsage.ru_majflt - prevUsage.ru_majflt
664 0 : && !logadd(&logbuf, "\"majflt\":%ld,",
665 : infoUsage.ru_majflt - prevUsage.ru_majflt))
666 : return;
667 0 : if (infoUsage.ru_nswap - prevUsage.ru_nswap
668 0 : && !logadd(&logbuf, "\"nswap\":%ld,",
669 : infoUsage.ru_nswap - prevUsage.ru_nswap))
670 : return;
671 0 : if (infoUsage.ru_nvcsw - prevUsage.ru_nvcsw
672 0 : && !logadd(&logbuf, "\"nvcsw\":%ld,",
673 0 : infoUsage.ru_nvcsw - prevUsage.ru_nvcsw +
674 0 : infoUsage.ru_nivcsw - prevUsage.ru_nivcsw))
675 : return;
676 0 : prevUsage = infoUsage;
677 : #endif
678 0 : if (!logadd(&logbuf, "\"state\":\"%s\"," "\"cpuload\":%s" "}\n", // end marker
679 : alter, cpuload))
680 : return;
681 0 : logjsonInternal(logbuf.logbuffer, true);
682 0 : logdel(&logbuf);
683 : }
684 :
685 : void
686 45 : profilerEvent(MalEvent *me, NonMalEvent *nme)
687 : {
688 45 : str event = NULL;
689 45 : assert(me == NULL || nme == NULL);
690 : /* ignore profiler monitoring cmds */
691 45 : if (me != NULL && me->cntxt != NULL && getModuleId(me->pci) == myname)
692 : return;
693 :
694 44 : MT_lock_set(&mal_profileLock);
695 44 : if (maleventstream) {
696 0 : if (me != NULL && me->mb != NULL && nme == NULL) {
697 0 : if (me->stk == NULL ||
698 0 : me->pci == NULL ||
699 0 : (profilerMode && me->mb && getPC(me->mb, me->pci) != 0)) {
700 0 : MT_lock_unset(&mal_profileLock);
701 0 : return; /* minimal mode */
702 : }
703 0 : event = prepareMalEvent(me->cntxt, me->mb, me->stk, me->pci,
704 : me->clk, me->duration);
705 : }
706 0 : if (me == NULL && nme != NULL && nme->phase != MAL_ENGINE) {
707 0 : event = prepareNonMalEvent(nme->cntxt, nme->phase, nme->clk,
708 : nme->tid, nme->ts, nme->state,
709 : nme->duration);
710 : }
711 0 : if (event) {
712 0 : logjsonInternal(event, true);
713 0 : GDKfree(event);
714 : }
715 : }
716 44 : MT_lock_unset(&mal_profileLock);
717 : }
718 :
719 : /* The first scheme dumps the events on a stream (and in the pool)
720 : */
721 : str
722 0 : openProfilerStream(Client cntxt, int m)
723 : {
724 : #ifdef HAVE_SYS_RESOURCE_H
725 0 : getrusage(RUSAGE_SELF, &infoUsage);
726 0 : prevUsage = infoUsage;
727 : #endif
728 0 : MT_lock_set(&mal_profileLock);
729 0 : if (myname == 0) {
730 0 : myname = putName("profiler");
731 0 : logjsonInternal(monet_characteristics, true);
732 : }
733 0 : if (maleventstream) {
734 : /* The DBA can always grab the stream, others have to wait */
735 0 : if (cntxt->user == MAL_ADMIN) {
736 0 : closeProfilerStream(cntxt);
737 : } else {
738 0 : MT_lock_unset(&mal_profileLock);
739 0 : throw(MAL, "profiler.start",
740 : "Profiler already running, stream not available");
741 : }
742 : }
743 : /* 4 activates profiler in minimal mode. 1 and 3 were used in prev MonetDB versions */
744 : /* 0 activates profiler in detailed mode */
745 0 : switch (m) {
746 0 : case 0:
747 0 : profilerStatus = -1;
748 0 : break;
749 0 : case 4:
750 0 : profilerStatus = -1;
751 0 : profilerMode = 1;
752 0 : break;
753 0 : default:
754 0 : MT_lock_unset(&mal_profileLock);
755 0 : throw(MAL, "profiler.openstream", "Undefined profiler mode option");
756 : }
757 0 : maleventstream = cntxt->fdout;
758 0 : profilerUser = cntxt->user;
759 :
760 0 : MT_lock_unset(&mal_profileLock);
761 0 : return MAL_SUCCEED;
762 : }
763 :
764 : str
765 1 : closeProfilerStream(Client cntxt)
766 : {
767 1 : (void) cntxt;
768 1 : maleventstream = NULL;
769 1 : profilerStatus = 0;
770 1 : profilerMode = 0;
771 1 : profilerUser = 0;
772 1 : return MAL_SUCCEED;
773 : }
774 :
775 : /* the second scheme is to collect the profile
776 : * events in a local table for direct SQL inspection
777 : */
778 : str
779 1 : startProfiler(Client cntxt)
780 : {
781 : #ifdef HAVE_SYS_RESOURCE_H
782 1 : getrusage(RUSAGE_SELF, &infoUsage);
783 1 : prevUsage = infoUsage;
784 : #endif
785 1 : (void) cntxt;
786 :
787 1 : MT_lock_set(&mal_profileLock);
788 1 : if (maleventstream) {
789 0 : MT_lock_unset(&mal_profileLock);
790 0 : throw(MAL, "profiler.start",
791 : "Profiler already running, stream not available");
792 : }
793 1 : if (myname == 0) {
794 1 : myname = putName("profiler");
795 : }
796 1 : profilerStatus = 1;
797 1 : logjsonInternal(monet_characteristics, true);
798 1 : MT_lock_unset(&mal_profileLock);
799 : // reset the trace table
800 1 : clearTrace(cntxt);
801 :
802 1 : return MAL_SUCCEED;
803 : }
804 :
805 : /* SQL tracing is simplified, because it only collects the events in the temporary table.
806 : */
807 : str
808 22 : startTrace(Client cntxt)
809 : {
810 22 : cntxt->sqlprofiler = TRUE;
811 22 : clearTrace(cntxt);
812 22 : return MAL_SUCCEED;
813 : }
814 :
815 : str
816 26 : stopTrace(Client cntxt)
817 : {
818 26 : cntxt->sqlprofiler = FALSE;
819 26 : return MAL_SUCCEED;
820 : }
821 :
822 : str
823 335 : stopProfiler(Client cntxt)
824 : {
825 335 : MT_lock_set(&mal_profileLock);
826 335 : if (profilerStatus)
827 1 : profilerStatus = 0;
828 335 : setHeartbeat(0); // stop heartbeat
829 335 : if (cntxt)
830 1 : closeProfilerStream(cntxt);
831 335 : MT_lock_unset(&mal_profileLock);
832 335 : return MAL_SUCCEED;
833 : }
834 :
835 : /*
836 : * SQL profile traces
837 : * The events being captured are stored in client specific BATs.
838 : * They are made persistent to accumate information over
839 : * multiple sessions. This means it has to be explicitly reset
840 : * to avoid disc overflow using profiler.reset().
841 : *
842 : * The information returned for the trace is purposely limited
843 : * to the ticks and the final MAL instruction.
844 : * For more detailed analysis, the stethoscope should be used.
845 : */
846 :
847 : static void
848 14 : _cleanupProfiler(Client cntxt)
849 : {
850 14 : BBPreclaim(cntxt->profticks);
851 14 : BBPreclaim(cntxt->profstmt);
852 14 : BBPreclaim(cntxt->profevents);
853 14 : cntxt->profticks = cntxt->profstmt = cntxt->profevents = NULL;
854 14 : }
855 :
856 : static inline BAT *
857 108 : TRACEcreate(int tt)
858 : {
859 108 : return COLnew(0, tt, 1 << 10, TRANSIENT);
860 : }
861 :
862 : static void
863 80 : initTrace(Client cntxt)
864 : {
865 80 : MT_lock_set(&mal_profileLock);
866 80 : if (cntxt->profticks) {
867 44 : MT_lock_unset(&mal_profileLock);
868 44 : return; /* already initialized */
869 : }
870 36 : cntxt->profticks = TRACEcreate(TYPE_lng);
871 36 : cntxt->profstmt = TRACEcreate(TYPE_str);
872 36 : cntxt->profevents = TRACEcreate(TYPE_str);
873 36 : if (cntxt->profticks == NULL || cntxt->profstmt == NULL
874 36 : || cntxt->profevents == NULL)
875 0 : _cleanupProfiler(cntxt);
876 36 : MT_lock_unset(&mal_profileLock);
877 : }
878 :
879 : int
880 53 : TRACEtable(Client cntxt, BAT **r)
881 : {
882 53 : initTrace(cntxt);
883 53 : MT_lock_set(&mal_profileLock);
884 53 : if (cntxt->profticks == NULL) {
885 0 : MT_lock_unset(&mal_profileLock);
886 0 : return -1; /* not initialized */
887 : }
888 53 : r[0] = COLcopy(cntxt->profticks, cntxt->profticks->ttype, false, TRANSIENT);
889 53 : r[1] = COLcopy(cntxt->profstmt, cntxt->profstmt->ttype, false, TRANSIENT);
890 53 : r[2] = COLcopy(cntxt->profevents, cntxt->profevents->ttype, false,
891 : TRANSIENT);
892 53 : MT_lock_unset(&mal_profileLock);
893 53 : if (r[0] == NULL || r[1] == NULL || r[2] == NULL) {
894 0 : BBPreclaim(r[0]);
895 0 : BBPreclaim(r[1]);
896 0 : BBPreclaim(r[2]);
897 0 : return -1;
898 : }
899 : return 3;
900 : }
901 :
902 : BAT *
903 8 : getTrace(Client cntxt, const char *nme)
904 : {
905 8 : BAT *bn = NULL;
906 :
907 8 : MT_lock_set(&mal_profileLock);
908 8 : if (cntxt->profticks) {
909 8 : if (strcmp(nme, "usec") == 0) {
910 4 : bn = COLcopy(cntxt->profticks, cntxt->profticks->ttype, false,
911 : TRANSIENT);
912 4 : } else if (strcmp(nme, "stmt") == 0) {
913 4 : bn = COLcopy(cntxt->profstmt, cntxt->profstmt->ttype, false,
914 : TRANSIENT);
915 0 : } else if (strcmp(nme, "events") == 0) {
916 0 : bn = COLcopy(cntxt->profevents, cntxt->profevents->ttype, false,
917 : TRANSIENT);
918 : }
919 : }
920 8 : MT_lock_unset(&mal_profileLock);
921 8 : return bn;
922 : }
923 :
924 : void
925 27 : clearTrace(Client cntxt)
926 : {
927 27 : (void) cntxt;
928 27 : MT_lock_set(&mal_profileLock);
929 27 : if (cntxt->profticks == NULL) {
930 13 : MT_lock_unset(&mal_profileLock);
931 13 : initTrace(cntxt);
932 13 : return; /* not initialized */
933 : }
934 : /* drop all trace tables */
935 14 : _cleanupProfiler(cntxt);
936 14 : MT_lock_unset(&mal_profileLock);
937 14 : initTrace(cntxt);
938 : }
939 :
940 : str
941 0 : cleanupTraces(Client cntxt)
942 : {
943 0 : clearTrace(cntxt);
944 0 : return MAL_SUCCEED;
945 : }
946 :
947 : void
948 858 : sqlProfilerEvent(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr pci,
949 : lng clk, lng ticks)
950 : {
951 858 : str stmt, c, ev;
952 858 : int errors = 0;
953 :
954 858 : if (cntxt->profticks == NULL)
955 : return;
956 :
957 : /* generate actual call statement */
958 858 : stmt = instruction2str(mb, stk, pci, LIST_MAL_ALL | LIST_MAL_ALGO);
959 858 : c = stmt;
960 :
961 : /* unclear why we needed this. OLD?
962 : while (c && *c && (isspace((unsigned char)*c) || *c == '!'))
963 : c++;
964 : */
965 :
966 858 : ev = prepareMalEvent(cntxt, mb, stk, pci, clk, ticks);
967 : // keep it a short transaction
968 858 : MT_lock_set(&mal_profileLock);
969 858 : if (cntxt->profticks == NULL) {
970 0 : MT_lock_unset(&mal_profileLock);
971 0 : GDKfree(stmt);
972 0 : return;
973 : }
974 858 : errors += BUNappend(cntxt->profticks, &ticks, false) != GDK_SUCCEED;
975 858 : errors += BUNappend(cntxt->profstmt, c, false) != GDK_SUCCEED;
976 858 : errors += BUNappend(cntxt->profevents, ev ? ev : str_nil,
977 858 : false) != GDK_SUCCEED;
978 858 : if (errors > 0) {
979 : /* stop profiling if an error occurred */
980 0 : cntxt->sqlprofiler = FALSE;
981 : }
982 :
983 858 : MT_lock_unset(&mal_profileLock);
984 858 : GDKfree(stmt);
985 858 : GDKfree(ev);
986 : }
987 :
988 : lng
989 0 : getDiskWrites(void)
990 : {
991 : #ifdef HAVE_SYS_RESOURCE_H
992 0 : struct rusage infoUsage;
993 0 : getrusage(RUSAGE_SELF, &infoUsage);
994 0 : return infoUsage.ru_oublock;
995 : #else
996 : return 0;
997 : #endif
998 : }
999 :
1000 : lng
1001 0 : getDiskReads(void)
1002 : {
1003 : #ifdef HAVE_SYS_RESOURCE_H
1004 0 : struct rusage infoUsage;
1005 0 : getrusage(RUSAGE_SELF, &infoUsage);
1006 0 : return infoUsage.ru_inblock;
1007 : #else
1008 : return 0;
1009 : #endif
1010 : }
1011 :
1012 : lng
1013 0 : getUserTime(void)
1014 : {
1015 : #ifdef HAVE_TIMES
1016 0 : struct tms newTms;
1017 0 : times(&newTms);
1018 0 : return newTms.tms_utime;
1019 : #else
1020 : return 0;
1021 : #endif
1022 : }
1023 :
1024 : lng
1025 0 : getSystemTime(void)
1026 : {
1027 : #ifdef HAVE_TIMES
1028 0 : struct tms newTms;
1029 0 : times(&newTms);
1030 0 : return newTms.tms_stime;
1031 : #else
1032 : return 0;
1033 : #endif
1034 : }
1035 :
1036 : /* Calculate a pessimistic size of the disk storage */
1037 : lng
1038 0 : getDiskSpace(void)
1039 : {
1040 0 : BAT *b;
1041 0 : bat i;
1042 0 : lng size = 0;
1043 :
1044 0 : for (i = 1; i < getBBPsize(); i++)
1045 0 : if (BBP_logical(i) && (BBP_refs(i) || BBP_lrefs(i))) {
1046 0 : b = BATdescriptor(i);
1047 0 : if (b) {
1048 0 : size += sizeof(BAT);
1049 :
1050 0 : MT_lock_set(&b->theaplock);
1051 0 : if (!isVIEW(b)) {
1052 0 : BUN cnt = BATcount(b);
1053 :
1054 : /* the upperbound is used for the heaps */
1055 0 : if (b->tvheap)
1056 0 : size += HEAPvmsize(b->tvheap);
1057 0 : MT_lock_unset(&b->theaplock);
1058 :
1059 0 : size += tailsize(b, cnt);
1060 0 : if (b->thash)
1061 0 : size += sizeof(BUN) * cnt;
1062 : /* also add the size of an imprint, ordered index or mosaic */
1063 0 : if (b->timprints)
1064 0 : size += IMPSimprintsize(b);
1065 0 : if (b->torderidx)
1066 0 : size += HEAPvmsize(b->torderidx);
1067 : } else {
1068 0 : MT_lock_unset(&b->theaplock);
1069 : }
1070 0 : BBPunfix(i);
1071 : }
1072 : }
1073 0 : return size;
1074 : }
1075 :
1076 :
1077 : void
1078 10 : profilerGetCPUStat(lng *user, lng *nice, lng *sys, lng *idle, lng *iowait)
1079 : {
1080 10 : (void) getCPULoad(NULL);
1081 10 : *user = corestat[LASTCPU].user;
1082 10 : *nice = corestat[LASTCPU].nice;
1083 10 : *sys = corestat[LASTCPU].system;
1084 10 : *idle = corestat[LASTCPU].idle;
1085 10 : *iowait = corestat[LASTCPU].iowait;
1086 10 : }
1087 :
1088 : /* the heartbeat process produces a ping event once every X milliseconds */
1089 : static MT_Id hbthread;
1090 : static ATOMIC_TYPE hbrunning = ATOMIC_VAR_INIT(0);
1091 :
1092 : static void
1093 335 : profilerHeartbeat(void *dummy)
1094 : {
1095 335 : int t;
1096 347 : const int timeout = ATOMIC_GET(&GDKdebug) & FORCEMITOMASK ? 10 : 25;
1097 :
1098 335 : (void) dummy;
1099 335 : for (;;) {
1100 : /* wait until you need this info */
1101 335 : MT_thread_setworking("sleeping");
1102 153725 : while (ATOMIC_GET(&hbdelay) == 0 || maleventstream == NULL) {
1103 153725 : if (GDKexiting() || !ATOMIC_GET(&hbrunning))
1104 334 : return;
1105 153391 : MT_sleep_ms(timeout);
1106 : }
1107 0 : for (t = (int) ATOMIC_GET(&hbdelay); t > 0; t -= timeout) {
1108 0 : if (GDKexiting() || !ATOMIC_GET(&hbrunning))
1109 0 : return;
1110 0 : MT_sleep_ms(t > timeout ? timeout : t);
1111 : }
1112 0 : if (GDKexiting() || !ATOMIC_GET(&hbrunning))
1113 0 : return;
1114 0 : MT_thread_setworking("pinging");
1115 0 : profilerHeartbeatEvent("ping");
1116 : }
1117 : }
1118 :
1119 : void
1120 669 : setHeartbeat(int delay)
1121 : {
1122 669 : if (delay < 0) {
1123 334 : ATOMIC_SET(&hbrunning, 0);
1124 334 : if (hbthread)
1125 334 : MT_join_thread(hbthread);
1126 334 : return;
1127 : }
1128 335 : if (delay > 0 && delay <= 10)
1129 0 : delay = 10;
1130 335 : ATOMIC_SET(&hbdelay, delay);
1131 : }
1132 :
1133 : /* TODO getprofilerlimit and setprofilerlimit functions */
1134 :
1135 : int
1136 2 : getprofilerlimit(void)
1137 : {
1138 2 : return 0;
1139 : }
1140 :
1141 : void
1142 0 : setprofilerlimit(int limit)
1143 : {
1144 0 : (void) limit;
1145 0 : }
1146 :
1147 : void
1148 335 : initProfiler(void)
1149 : {
1150 335 : gettimeofday(&startup_time, NULL);
1151 335 : }
1152 :
1153 : void
1154 335 : initHeartbeat(void)
1155 : {
1156 335 : ATOMIC_SET(&hbrunning, 1);
1157 335 : if (MT_create_thread(&hbthread, profilerHeartbeat, NULL, MT_THR_JOINABLE,
1158 : "heartbeat") < 0) {
1159 : /* it didn't happen */
1160 0 : hbthread = 0;
1161 0 : ATOMIC_SET(&hbrunning, 0);
1162 : }
1163 335 : }
|