Edit File by line

# psn -- Linux Process Snapper by Tanel Poder [https://0x.tools]

[0] Fix | Delete

[1] Fix | Delete

[2] Fix | Delete

# This program is free software; you can redistribute it and/or modify

[3] Fix | Delete

# it under the terms of the GNU General Public License as published by

[4] Fix | Delete

# the Free Software Foundation; either version 2 of the License, or

[5] Fix | Delete

# (at your option) any later version.

[6] Fix | Delete

[7] Fix | Delete

# This program is distributed in the hope that it will be useful,

[8] Fix | Delete

# but WITHOUT ANY WARRANTY; without even the implied warranty of

[9] Fix | Delete

# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the

[10] Fix | Delete

# GNU General Public License for more details.

[11] Fix | Delete

[12] Fix | Delete

# You should have received a copy of the GNU General Public License along

[13] Fix | Delete

# with this program; if not, write to the Free Software Foundation, Inc.,

[14] Fix | Delete

# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.

[15] Fix | Delete

[16] Fix | Delete

# SPDX-License-Identifier: GPL-2.0-or-later

[17] Fix | Delete

[18] Fix | Delete

# query/report code

[19] Fix | Delete

[20] Fix | Delete

from itertools import groupby

[21] Fix | Delete

from datetime import datetime

[22] Fix | Delete

[23] Fix | Delete

import psnproc as proc

[24] Fix | Delete

import logging

[25] Fix | Delete

[26] Fix | Delete

def flatten(li):

[27] Fix | Delete

return [item for sublist in li for item in sublist]

[28] Fix | Delete

[29] Fix | Delete

[30] Fix | Delete

### ASCII table output ###

[31] Fix | Delete

def output_table_report(report, dataset):

[32] Fix | Delete

max_field_width = 500

[33] Fix | Delete

header_fmts, field_fmts = [], []

[34] Fix | Delete

total_field_width = 0

[35] Fix | Delete

total_field_width_without_kstack = 0

[36] Fix | Delete

[37] Fix | Delete

if dataset:

[38] Fix | Delete

col_idx = 0

[39] Fix | Delete

for source, cols, expr, token in report.full_projection():

[40] Fix | Delete

if token in ('pid', 'task', 'samples'):

[41] Fix | Delete

col_type = int

[42] Fix | Delete

elif token == 'event_time':

[43] Fix | Delete

col_type = str

[44] Fix | Delete

elif token == 'avg_threads':

[45] Fix | Delete

col_type = float

[46] Fix | Delete

elif cols:

[47] Fix | Delete

col = [c for c in source.available_columns if c[0] == cols[0]][0]

[48] Fix | Delete

col_type = col[1]

[49] Fix | Delete

else:

[50] Fix | Delete

col_type = str

[51] Fix | Delete

[52] Fix | Delete

if col_type in (str, int, int):

[53] Fix | Delete

max_field_length = max([len(str(row[col_idx])) for row in dataset])

[54] Fix | Delete

elif col_idx == float:

[55] Fix | Delete

max_field_length = max([len(str(int(row[col_idx]))) for row in dataset]) + 3 # arbitrary!

[56] Fix | Delete

[57] Fix | Delete

field_width = min(max_field_width, max(len(token), max_field_length))

[58] Fix | Delete

[59] Fix | Delete

# left-align strings both in header and data

[60] Fix | Delete

if col_type == str:

[61] Fix | Delete

header_fmts.append('%%-%s.%ss' % (field_width, field_width))

[62] Fix | Delete

else:

[63] Fix | Delete

header_fmts.append('%%%s.%ss' % (field_width, field_width))

[64] Fix | Delete

[65] Fix | Delete

if col_type == str:

[66] Fix | Delete

field_fmts.append('%%-%s.%ss' % (field_width, field_width))

[67] Fix | Delete

elif col_type in (int, int):

[68] Fix | Delete

field_fmts.append('%%%sd' % field_width)

[69] Fix | Delete

elif col_type == float:

[70] Fix | Delete

field_fmts.append('%%%s.%sf' % (field_width, 2)) # arbitrary

[71] Fix | Delete

[72] Fix | Delete

total_field_width += field_width

[73] Fix | Delete

total_field_width_without_kstack += field_width if token != 'kstack' else 0

[74] Fix | Delete

col_idx += 1

[75] Fix | Delete

[76] Fix | Delete

report_width = total_field_width + (3 * (len(header_fmts) -1)) + 2

[77] Fix | Delete

hr = '-' * report_width

[78] Fix | Delete

title_pad = report_width - len(report.name) - 2

[79] Fix | Delete

#title = '=== ' + report.name + ' ' + '=' * (title_pad - 29) + ' [' + datetime.now().strftime("%Y-%m-%d %H:%M:%S") + '] ==='

[80] Fix | Delete

title = '=== ' + report.name + ' ' + '=' * (title_pad - 3)

[81] Fix | Delete

header_fmt = ' ' + ' | '.join(header_fmts) + ' '

[82] Fix | Delete

field_fmt = ' ' + ' | '.join(field_fmts) + ' '

[83] Fix | Delete

[84] Fix | Delete

print("")

[85] Fix | Delete

print(title)

[86] Fix | Delete

print("")

[87] Fix | Delete

if dataset:

[88] Fix | Delete

print(header_fmt % tuple([c[3] for c in report.full_projection()]))

[89] Fix | Delete

print(hr)

[90] Fix | Delete

for row in dataset:

[91] Fix | Delete

print(field_fmt % row)

[92] Fix | Delete

else:

[93] Fix | Delete

print('query returned no rows')

[94] Fix | Delete

print("")

[95] Fix | Delete

print("")

[96] Fix | Delete

[97] Fix | Delete

[98] Fix | Delete

[99] Fix | Delete

class Report:

[100] Fix | Delete

def __init__(self, name, projection, dimensions=[], where=[], order=[], output_fn=output_table_report):

[101] Fix | Delete

def reify_column_token(col_token):

[102] Fix | Delete

if col_token == 'samples':

[103] Fix | Delete

return (None, [], 'COUNT(1)', col_token)

[104] Fix | Delete

elif col_token == 'avg_threads':

[105] Fix | Delete

return (None, [], 'CAST(COUNT(1) AS REAL) / %(num_sample_events)s', col_token)

[106] Fix | Delete

elif col_token in ('pid', 'task', 'event_time'):

[107] Fix | Delete

return ('first_source', [col_token], col_token, col_token)

[108] Fix | Delete

[109] Fix | Delete

for t in proc.all_sources:

[110] Fix | Delete

for c in t.schema_columns:

[111] Fix | Delete

if col_token.lower() == c[0].lower():

[112] Fix | Delete

return (t, [c[0]], c[0], c[0])

[113] Fix | Delete

[114] Fix | Delete

raise Exception('projection/dimension column %s not found.\nUse psn --list to see all available columns' % col_token)

[115] Fix | Delete

[116] Fix | Delete

def process_filter_sql(filter_sql):

[117] Fix | Delete

idle_filter = "stat.state_id IN ('S', 'Z', 'I')"

[118] Fix | Delete

[119] Fix | Delete

if filter_sql == 'active':

[120] Fix | Delete

return (proc.stat, ['state_id'], 'not(%s)' % idle_filter, filter_sql)

[121] Fix | Delete

elif filter_sql == 'idle':

[122] Fix | Delete

return (proc.stat, ['state_id'], idle_filter, filter_sql)

[123] Fix | Delete

else:

[124] Fix | Delete

raise Exception('arbitrary filtering not implemented')

[125] Fix | Delete

[126] Fix | Delete

self.name = name

[127] Fix | Delete

self.projection = [reify_column_token(t) for t in projection if t]

[128] Fix | Delete

self.dimensions = [reify_column_token(t) for t in dimensions if t]

[129] Fix | Delete

self.order = [reify_column_token(t) for t in order if t]

[130] Fix | Delete

self.where = [process_filter_sql(t) for t in where if t]

[131] Fix | Delete

self.output_fn = output_fn

[132] Fix | Delete

[133] Fix | Delete

# columns without a specific source are assigned the first source

[134] Fix | Delete

first_source = [c[0] for c in (self.projection + self.dimensions + self.order + self.where) if c[0] and c[0] != 'first_source'][0]

[135] Fix | Delete

self.projection = [(first_source if c[0] == 'first_source' else c[0], c[1], c[2], c[3]) for c in self.projection]

[136] Fix | Delete

self.dimensions = [(first_source if c[0] == 'first_source' else c[0], c[1], c[2], c[3]) for c in self.dimensions]

[137] Fix | Delete

self.order = [(first_source if c[0] == 'first_source' else c[0], c[1], c[2], c[3]) for c in self.order]

[138] Fix | Delete

self.where = [(first_source if c[0] == 'first_source' else c[0], c[1], c[2], c[3]) for c in self.where]

[139] Fix | Delete

[140] Fix | Delete

self.sources = {} # source -> [cols]

[141] Fix | Delete

for d in [self.projection, self.dimensions, self.order, self.where]:

[142] Fix | Delete

for source, column_names, expr, token in d:

[143] Fix | Delete

source_columns = self.sources.get(source, ['pid', 'task', 'event_time'])

[144] Fix | Delete

source_columns.extend(column_names)

[145] Fix | Delete

self.sources[source] = source_columns

[146] Fix | Delete

if None in self.sources:

[147] Fix | Delete

del self.sources[None]

[148] Fix | Delete

[149] Fix | Delete

[150] Fix | Delete

def full_projection(self):

[151] Fix | Delete

return self.projection + [c for c in self.dimensions if c not in self.projection]

[152] Fix | Delete

[153] Fix | Delete

[154] Fix | Delete

def query(self):

[155] Fix | Delete

def render_col(c):

[156] Fix | Delete

return '%s.%s' % (c[0].name, c[2]) if c[0] else c[2]

[157] Fix | Delete

[158] Fix | Delete

# build join conditions

[159] Fix | Delete

first_source_name = list(self.sources.keys())[0].name

[160] Fix | Delete

join_where = flatten([['%s.%s = %s.%s' % (s.name, c, first_source_name, c) for c in ['pid', 'task', 'event_time']] for s in list(self.sources.keys())[1:]])

[161] Fix | Delete

[162] Fix | Delete

attr = {

[163] Fix | Delete

'projection': '\t' + ',\n\t'.join([render_col(c) for c in self.full_projection()]),

[164] Fix | Delete

'tables': '\t' + ',\n\t'.join([s.name for s in self.sources]),

[165] Fix | Delete

'where': '\t' + ' AND\n\t'.join([c[2] for c in self.where] + join_where),

[166] Fix | Delete

'dimensions': '\t' + ',\n\t'.join([render_col(c) for c in self.dimensions]),

[167] Fix | Delete

'order': '\t' + ',\n\t'.join([render_col(c) + ' DESC' for c in self.order]),

[168] Fix | Delete

'num_sample_events': '(SELECT COUNT(DISTINCT(event_time)) FROM %s)' % first_source_name

[169] Fix | Delete

}

[170] Fix | Delete

[171] Fix | Delete

logging.debug('attr where=%s#end' % attr['where'])

[172] Fix | Delete

[173] Fix | Delete

sql = 'SELECT\n%(projection)s\nFROM\n%(tables)s' % attr

[174] Fix | Delete

# tanel changed from self.where to attr['where']

[175] Fix | Delete

# TODO think through the logic of using self.where vs attr.where (in the context of allowing pid/tid to be not part of group by)

[176] Fix | Delete

if attr['where'].strip():

[177] Fix | Delete

sql += '\nWHERE\n%(where)s' % attr

[178] Fix | Delete

if attr['dimensions']:

[179] Fix | Delete

sql += '\nGROUP BY\n%(dimensions)s' % attr

[180] Fix | Delete

if attr['order']:

[181] Fix | Delete

sql += '\nORDER BY\n%(order)s' % attr

[182] Fix | Delete

[183] Fix | Delete

# final substitution allows things like avg_threads to work

[184] Fix | Delete

return sql % attr

[185] Fix | Delete

[186] Fix | Delete

[187] Fix | Delete

def dataset(self, conn):

[188] Fix | Delete

logging.debug(self.query())

[189] Fix | Delete

r = conn.execute(self.query()).fetchall()

[190] Fix | Delete

logging.debug('Done')

[191] Fix | Delete

return r

[192] Fix | Delete

[193] Fix | Delete

def output_report(self, conn):

[194] Fix | Delete

self.output_fn(self, self.dataset(conn))

[195] Fix | Delete

[196] Fix | Delete

[197] Fix | Delete

[198] Fix | Delete

[199] Fix | Delete

[200] Fix | Delete

[201] Fix | Delete