1    	/*
2    	 * Copyright (c) 1995-2002 Silicon Graphics, Inc.  All Rights Reserved.
3    	 * 
4    	 * This program is free software; you can redistribute it and/or modify it
5    	 * under the terms of the GNU General Public License as published by the
6    	 * Free Software Foundation; either version 2 of the License, or (at your
7    	 * option) any later version.
8    	 * 
9    	 * This program is distributed in the hope that it will be useful, but
10   	 * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
11   	 * or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
12   	 * for more details.
13   	 */
14   	
15   	#include "pmapi.h"
16   	#include "impl.h"
17   	#include "pmcd.h"
18   	
19   	extern int _pmSelectReadable(int, fd_set *);
20   	
21   	/* Routine to split a result into a list of results, each containing metrics
22   	 * from a single domain.  The end of the list is marked by a pmResult with a
23   	 * numpmid of zero.  Any pmids for which there is no agent will be in the
24   	 * second to last pmResult which will have a negated numpmid value.
25   	 */
26   	
27   	pmResult **
28   	SplitResult(pmResult *res)
29   	{
30   	    int		i, j;
31   	    static int	*aFreq = NULL;	/* Freq. histogram: pmids for each agent */
32   	    static int	*resIndex = NULL;	/* resIndex[k] = index of agent[k]'s list in result */
33   	    static int	nDoms = 0;		/* No. of entries in two tables above */
34   	    int		nGood;
35   	    int		need;
36   	    pmResult	**results;
37   	
38   	    /* Allocate the frequency histogram and array for mapping from agent to
39   	     * result list index.  Because a SIGHUP reconfiguration may have caused a
40   	     * change in the number of agents, reallocation using a new size may be
41   	     * necessary.
42   	     * There are nAgents + 1 entries in the aFreq and resIndex arrays.  The
43   	     * last entry in each is used for the pmIDs for which no agent could be
44   	     * found.
45   	     */
46   	    if (nAgents > nDoms) {
47   		nDoms = nAgents;
48   		if (aFreq != NULL)
49   		    free(aFreq);
50   		if (resIndex != NULL)
51   		    free(resIndex);
52   		aFreq = (int *)malloc((nAgents + 1) * sizeof(int));
53   		resIndex = (int *)malloc((nAgents + 1) * sizeof(int));
54   		if (aFreq == NULL || resIndex == NULL) {
55   		    __pmNoMem("SplitResult.freq", 2 * (nAgents + 1) * sizeof(int), PM_FATAL_ERR);
56   		}
57   	    }
58   	
59   	    /* Build a frequency histogram of metric domains (use aFreq[nAgents] for
60   	     * pmids for which there is no agent).
61   	     */
62   	    for (i = 0; i <= nAgents; i++)
63   		aFreq[i] = 0;
64   	    for (i = 0; i < res->numpmid; i++) {
65   		int dom = ((__pmID_int *)&res->vset[i]->pmid)->domain;
66   		for (j = 0; j < nAgents; j++)
67   		    if (agent[j].pmDomainId == dom && agent[j].status.connected)
68   			break;
69   		aFreq[j]++;
70   	    }
71   	
72   	    /* Initialise resIndex and allocate the results structures */
73   	    nGood = 0;
74   	    for (i = 0; i < nAgents; i++)
75   		if (aFreq[i]) {
76   		    resIndex[i] = nGood;
77   		    nGood++;
78   		}
79   	    resIndex[nAgents] = nGood;
80   	
81   	    need = nGood + 1 + ((aFreq[nAgents]) ? 1 : 0);
82   	    need *= sizeof(pmResult *);
83   	    if ((results = (pmResult **) malloc(need)) == NULL) {
84   		__pmNoMem("SplitResult.results", need, PM_FATAL_ERR);
85   	    }
86   	    j = 0;
87   	    for (i = 0; i <= nAgents; i++)
88   		if (aFreq[i]) {
89   		    need = (int)sizeof(pmResult) + (aFreq[i] - 1) * (int)sizeof(pmValueSet *);
90   		    results[j] = (pmResult *) malloc(need);
91   		    if (results[j] == NULL) {
92   			__pmNoMem("SplitResult.domain", need, PM_FATAL_ERR);
93   		    }
94   		    results[j]->numpmid = aFreq[i];
95   		    j++;
96   		}
97   	
98   	    /* Make the "end of list" pmResult */
99   	    if ((results[j] = (pmResult *) malloc(sizeof(pmResult))) == NULL) {
100  		__pmNoMem("SplitResult.domain", sizeof(pmResult), PM_FATAL_ERR);
101  	    }
102  	    results[j]->numpmid = 0;
103  	
104  	    /* Foreach vset in res, find it's pmResult in the per domain results array
105  	     * and copy a pointer to the vset to the next available position in the per
106  	     * domain result.
107  	     */
108  	    for (i = 0; i <= nAgents; i++)
109  		aFreq[i] = 0;
110  	    for (i = 0; i < res->numpmid; i++) {
111  		int dom = ((__pmID_int *)&res->vset[i]->pmid)->domain;
112  		for (j = 0; j < nAgents; j++)
113  		    if (dom == agent[j].pmDomainId && agent[j].status.connected)
114  			break;
115  		results[resIndex[j]]->vset[aFreq[j]] = res->vset[i];
116  		aFreq[j]++;
117  	    }
118  	
119  	    /* Flip the sign of numpmids in the "bad list" */
120  	    if (aFreq[nAgents]) {
121  		int bad = resIndex[nAgents];
122  		results[bad]->numpmid = -results[bad]->numpmid;
123  	    }
124  	
125  	    return results;
126  	}
127  	
128  	int
129  	DoStore(ClientInfo *cp, __pmPDU* pb)
130  	{
131  	    int		sts;
132  	    int		s;
133  	    AgentInfo	*ap;
134  	    pmResult	*result;
135  	    pmResult	**dResult;
136  	    int		i;
137  	    fd_set	readyFds;
138  	    fd_set	waitFds;
139  	    int		nWait = 0;
140  	    int		maxFd = -1;
141  	    int		badStore;		/* != 0 => store to nonexistent agent */
142  	    int		notReady = 0;		/* != 0 => store to agent that's not ready */
143  	    struct timeval	timeout;
144  	
145  	
146  	    if ((sts = __pmDecodeResult(pb, &result)) < 0)
147  		return sts;
148  	
149  	    dResult = SplitResult(result);
150  	
151  	    /* Send the per-domain results to their respective agents */
152  	
153  	    FD_ZERO(&waitFds);
154  	    for (i = 0; dResult[i]->numpmid > 0; i++) {
155  		int fd;
Event returned_null: Function "FindDomainAgent" returns null (checked 7 out of 8 times). [details]
Event var_assigned: Assigning: "ap" = null return value from "FindDomainAgent".
Also see events: [example_checked][example_checked][example_checked][example_checked][example_checked][dereference]
156  		ap = FindDomainAgent(((__pmID_int *)&dResult[i]->vset[0]->pmid)->domain);
157  		/* If it's in a "good" list, pmID has agent that is connected */
158  	
Event dereference: Dereferencing a null pointer "ap".
Also see events: [returned_null][example_checked][example_checked][example_checked][example_checked][example_checked][var_assigned]
159  		if (ap->ipcType == AGENT_DSO) {
160  		    if (ap->ipc.dso.dispatch.comm.pmda_interface >= PMDA_INTERFACE_5)
161  			ap->ipc.dso.dispatch.version.four.ext->e_context = cp - client;
162  		    if (ap->ipc.dso.dispatch.comm.pmda_interface >= PMDA_INTERFACE_4)
163  			s = ap->ipc.dso.dispatch.version.four.store(dResult[i],
164  					       ap->ipc.dso.dispatch.version.four.ext);
165  		    else if (ap->ipc.dso.dispatch.comm.pmda_interface == PMDA_INTERFACE_2 ||
166  		        ap->ipc.dso.dispatch.comm.pmda_interface == PMDA_INTERFACE_3)
167  			s = ap->ipc.dso.dispatch.version.two.store(dResult[i],
168  					       ap->ipc.dso.dispatch.version.two.ext);
169  		    else
170  			s = ap->ipc.dso.dispatch.version.one.store(dResult[i]);
171  		    if (s < 0 &&
172  			ap->ipc.dso.dispatch.comm.pmapi_version == PMAPI_VERSION_1)
173  			    s = XLATE_ERR_1TO2(s);
174  		}
175  		else {
176  		    if (ap->status.notReady == 0) {
177  			/* agent is ready for PDUs */
178  			if (_pmcd_trace_mask)
179  			    pmcd_trace(TR_XMIT_PDU, ap->inFd, PDU_RESULT, dResult[i]->numpmid);
180  			s = __pmSendResult(ap->inFd, cp - client, dResult[i]);
181  			if (s >= 0) {
182  			    ap->status.busy = 1;
183  			    fd = ap->outFd;
184  			    FD_SET(fd, &waitFds);
185  			    if (fd > maxFd)
186  				maxFd = fd;
187  			    nWait++;
188  			}
189  			else if (s == PM_ERR_IPC || sts == PM_ERR_TIMEOUT || s == -EPIPE) {
190  			    pmcd_trace(TR_XMIT_ERR, ap->inFd, PDU_RESULT, sts);
191  			    CleanupAgent(ap, AT_COMM, ap->inFd);
192  			}
193  		    }
194  		    else
195  			/* agent is not ready for PDUs */
196  			notReady = 1;
197  		}
198  		if (s < 0) {
199  		    sts = s;
200  		    continue;
201  		}
202  	    }
203  	
204  	    /* If there was no agent for one or more pmIDs, there will be a "bad list"
205  	     * with a negated numpmid value.  Store as many "good" pmIDs as possible
206  	     * but remember that there were homeless ones.
207  	     */
208  	    badStore = dResult[i]->numpmid < 0;
209  	
210  	    /* Collect error PDUs containing store status from each active agent */
211  	
212  	    while (nWait > 0) {
213  		memcpy(&readyFds, &waitFds, sizeof(readyFds));
214  		if (nWait > 1) {
215  		    timeout.tv_sec = _pmcd_timeout;
216  		    timeout.tv_usec = 0;
217  	
218  		    s = select(maxFd+1, &readyFds, NULL, NULL, &timeout);
219  	
220  		    if (s == 0) {
221  			__pmNotifyErr(LOG_INFO, "DoStore: select timeout");
222  	
223  			/* Timeout, terminate agents that haven't responded */
224  			for (i = 0; i < nAgents; i++) {
225  			    if (agent[i].status.busy) {
226  				pmcd_trace(TR_RECV_TIMEOUT, agent[i].outFd, PDU_ERROR, 0);
227  				CleanupAgent(&agent[i], AT_COMM, agent[i].inFd);
228  			    }
229  			}
230  			sts = PM_ERR_IPC;
231  			break;
232  		    }
233  		    else if (sts < 0) {
234  			/* this is not expected to happen! */
235  			__pmNotifyErr(LOG_ERR, "DoStore: fatal select failure: %s\n",
236  				netstrerror());
237  			Shutdown();
238  			exit(1);
239  		    }
240  		}
241  	
242  		for (i = 0; i < nAgents; i++) {
243  		    ap = &agent[i];
244  		    if (!ap->status.busy || !FD_ISSET(ap->outFd, &readyFds))
245  			continue;
246  		    ap->status.busy = 0;
247  		    FD_CLR(ap->outFd, &waitFds);
248  		    nWait--;
249  		    s = __pmGetPDU(ap->outFd, ANY_SIZE, _pmcd_timeout, &pb);
250  		    if (s > 0 && _pmcd_trace_mask)
251  			pmcd_trace(TR_RECV_PDU, ap->outFd, s, (int)((__psint_t)pb & 0xffffffff));
252  		    if (s == PDU_ERROR) {
253  			int ss;
254  			if ((ss = __pmDecodeError(pb, &s)) < 0)
255  			    sts = ss;
256  			else {
257  			    if (s < 0) {
258  				extern int CheckError(AgentInfo *, int);
259  	
260  				sts = CheckError(ap, s);
261  				pmcd_trace(TR_RECV_ERR, ap->outFd, PDU_RESULT, sts);
262  			    }
263  			}
264  		    }
265  		    else {
266  			/* Agent protocol error */
267  			if (s < 0)
268  			    pmcd_trace(TR_RECV_ERR, ap->outFd, PDU_RESULT, s);
269  			else
270  			    pmcd_trace(TR_WRONG_PDU, ap->outFd, PDU_ERROR, s);
271  			sts = PM_ERR_IPC;
272  		    }
273  	
274  		    if (ap->ipcType != AGENT_DSO &&
275  			(sts == PM_ERR_IPC || sts == PM_ERR_TIMEOUT))
276  			CleanupAgent(ap, AT_COMM, ap->outFd);
277  		}
278  	    }
279  	
280  	    /* Only one error code can be returned, so "no agent" or "not
281  	     * ready" errors have precedence over all except IPC and TIMEOUT
282  	     * protocol failures.
283  	     * Note that we make only a weak effort to return the most
284  	     * appropriate error status because clients interested in the
285  	     * outcome should be using pmStore on individual metric/instances
286  	     * if the outcome is important.  In particular, in multi-agent
287  	     * stores, an earlier PM_ERR_IPC error can be "overwritten" by a
288  	     * subsequent less serious error.
289  	     */
290  	    if (sts != PM_ERR_IPC && sts != PM_ERR_TIMEOUT) {
291  		if (badStore) {
292  		    sts = PM_ERR_NOAGENT;
293  		}
294  		else if (notReady) {
295  		    sts = PM_ERR_AGAIN;
296  		}
297  	    }
298  	
299  	    if (sts >= 0) {
300  		/* send PDU_ERROR, even if result was 0 */
301  		int s;
302  		if (_pmcd_trace_mask)
303  		    pmcd_trace(TR_XMIT_PDU, cp->fd, PDU_ERROR, 0);
304  		s = __pmSendError(cp->fd, FROM_ANON, 0);
305  		if (s < 0)
306  		    CleanupClient(cp, s);
307  	    }
308  	
309  	    pmFreeResult(result);
310  	    i = 0;
311  	    do {
312  		s = dResult[i]->numpmid;
313  		free(dResult[i]);
314  		i++;
315  	    } while (s);			/* numpmid == 0 terminates list */
316  	    free(dResult);
317  	
318  	    return sts;
319  	}