1    	// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2    	// vim: ts=8 sw=2 smarttab
3    	/*
4    	 * Ceph - scalable distributed file system
5    	 *
6    	 * Copyright (C) 2011 New Dream Network
7    	 * Copyright (C) 2018 Red Hat, Inc.
8    	 *
9    	 * This is free software; you can redistribute it and/or
10   	 * modify it under the terms of the GNU Lesser General Public
11   	 * License version 2.1, as published by the Free Software
12   	 * Foundation.  See file COPYING.
13   	 *
14   	 */
15   	
16   	#include <errno.h>
17   	#include <fcntl.h>
18   	#include <stdint.h>
19   	#include <string.h>
20   	#include <sys/mount.h>
21   	#include <sys/param.h>
22   	#include <sys/socket.h>
23   	#include <sys/stat.h>
24   	#include <sys/types.h>
25   	#include <unistd.h>
26   	#if defined(__linux__) 
27   	#include <sys/vfs.h>
28   	#endif
29   	
30   	#include "include/compat.h"
31   	#include "include/sock_compat.h"
32   	#include "common/safe_io.h"
33   	
34   	// The type-value for a ZFS FS in fstatfs.
35   	#define FS_ZFS_TYPE 0xde
36   	
37   	// On FreeBSD, ZFS fallocate always fails since it is considered impossible to
38   	// reserve space on a COW filesystem. posix_fallocate() returns EINVAL
39   	// Linux in this case already emulates the reservation in glibc
40   	// In which case it is allocated manually, and still that is not a real guarantee
41   	// that a full buffer is allocated on disk, since it could be compressed.
42   	// To prevent this the written buffer needs to be loaded with random data.
43   	int manual_fallocate(int fd, off_t offset, off_t len) {
44   	  int r = lseek(fd, offset, SEEK_SET);
45   	  if (r == -1)
46   	    return errno;
47   	  char data[1024*128];
48   	  // TODO: compressing filesystems would require random data
49   	  memset(data, 0x42, sizeof(data));
50   	  for (off_t off = 0; off < len; off += sizeof(data)) {
51   	    if (off + static_cast<off_t>(sizeof(data)) > len)
52   	      r = safe_write(fd, data, len - off);
53   	    else
54   	      r = safe_write(fd, data, sizeof(data));
55   	    if (r == -1) {
56   	      return errno;
57   	    }
58   	  }
59   	  return 0;
60   	}
61   	
62   	int on_zfs(int basedir_fd) {
63   	  struct statfs basefs;
64   	  (void)fstatfs(basedir_fd, &basefs);
65   	  return (basefs.f_type == FS_ZFS_TYPE);
66   	}
67   	
68   	int ceph_posix_fallocate(int fd, off_t offset, off_t len) {
69   	  // Return 0 if oke, otherwise errno > 0
70   	
71   	#ifdef HAVE_POSIX_FALLOCATE
72   	  if (on_zfs(fd)) {
73   	    return manual_fallocate(fd, offset, len);
74   	  } else {
75   	    return posix_fallocate(fd, offset, len);
76   	  }
77   	#elif defined(__APPLE__)
78   	  fstore_t store;
79   	  store.fst_flags = F_ALLOCATECONTIG;
80   	  store.fst_posmode = F_PEOFPOSMODE;
81   	  store.fst_offset = offset;
82   	  store.fst_length = len;
83   	
84   	  int ret = fcntl(fd, F_PREALLOCATE, &store);
85   	  if (ret == -1) {
86   	    ret = errno;
87   	  }
88   	  return ret;
89   	#else
90   	  return manual_fallocate(fd, offset, len);
91   	#endif
92   	} 
93   	
94   	int pipe_cloexec(int pipefd[2], int flags)
95   	{
96   	#if defined(HAVE_PIPE2)
97   	  return pipe2(pipefd, O_CLOEXEC | flags);
98   	#else
99   	  if (pipe(pipefd) == -1)
100  	    return -1;
101  	
102  	  /*
103  	   * The old-fashioned, race-condition prone way that we have to fall
104  	   * back on if pipe2 does not exist.
105  	   */
106  	  if (fcntl(pipefd[0], F_SETFD, FD_CLOEXEC) < 0) {
107  	    goto fail;
108  	  }
109  	
110  	  if (fcntl(pipefd[1], F_SETFD, FD_CLOEXEC) < 0) {
111  	    goto fail;
112  	  }
113  	
114  	  return 0;
115  	fail:
116  	  int save_errno = errno;
117  	  VOID_TEMP_FAILURE_RETRY(close(pipefd[0]));
118  	  VOID_TEMP_FAILURE_RETRY(close(pipefd[1]));
119  	  return (errno = save_errno, -1);
120  	#endif
121  	}
122  	
123  	
124  	int socket_cloexec(int domain, int type, int protocol)
125  	{
126  	#ifdef SOCK_CLOEXEC
(1) Event open_fn: Returning handle opened by "socket".
(2) Event return_handle_fn: Directly returning handle opened by "socket".
127  	  return socket(domain, type|SOCK_CLOEXEC, protocol);
128  	#else
129  	  int fd = socket(domain, type, protocol);
130  	  if (fd == -1)
131  	    return -1;
132  	
133  	  if (fcntl(fd, F_SETFD, FD_CLOEXEC) < 0)
134  	    goto fail;
135  	
136  	  return fd;
137  	fail:
138  	  int save_errno = errno;
139  	  VOID_TEMP_FAILURE_RETRY(close(fd));
140  	  return (errno = save_errno, -1);
141  	#endif
142  	}
143  	
144  	int socketpair_cloexec(int domain, int type, int protocol, int sv[2])
145  	{
146  	#ifdef SOCK_CLOEXEC
147  	  return socketpair(domain, type|SOCK_CLOEXEC, protocol, sv);
148  	#else
149  	  int rc = socketpair(domain, type, protocol, sv);
150  	  if (rc == -1)
151  	    return -1;
152  	
153  	  if (fcntl(sv[0], F_SETFD, FD_CLOEXEC) < 0)
154  	    goto fail;
155  	
156  	  if (fcntl(sv[1], F_SETFD, FD_CLOEXEC) < 0)
157  	    goto fail;
158  	
159  	  return 0;
160  	fail:
161  	  int save_errno = errno;
162  	  VOID_TEMP_FAILURE_RETRY(close(sv[0]));
163  	  VOID_TEMP_FAILURE_RETRY(close(sv[1]));
164  	  return (errno = save_errno, -1);
165  	#endif
166  	}
167  	
168  	int accept_cloexec(int sockfd, struct sockaddr* addr, socklen_t* addrlen)
169  	{
170  	#ifdef HAVE_ACCEPT4
171  	  return accept4(sockfd, addr, addrlen, SOCK_CLOEXEC);
172  	#else
173  	  int fd = accept(sockfd, addr, addrlen);
174  	  if (fd == -1)
175  	    return -1;
176  	
177  	  if (fcntl(fd, F_SETFD, FD_CLOEXEC) < 0)
178  	    goto fail;
179  	
180  	  return fd;
181  	fail:
182  	  int save_errno = errno;
183  	  VOID_TEMP_FAILURE_RETRY(close(fd));
184  	  return (errno = save_errno, -1);
185  	#endif
186  	}
187  	
188  	#if defined(__FreeBSD__)
189  	int sched_setaffinity(pid_t pid, size_t cpusetsize,
190  	                      cpu_set_t *mask)
191  	{
192  	  return 0;
193  	}
194  	#endif
195  	
196