File: | home/bhubbard/working/src/ceph/src/rocksdb/env/io_posix.cc |
Warning: | line 629, column 5 Null pointer passed as an argument to a 'nonnull' parameter |
[?] Use j/k keys for keyboard navigation
1 | // Copyright (c) 2011-present, Facebook, Inc. All rights reserved. | |||
2 | // This source code is licensed under both the GPLv2 (found in the | |||
3 | // COPYING file in the root directory) and Apache 2.0 License | |||
4 | // (found in the LICENSE.Apache file in the root directory). | |||
5 | // | |||
6 | // Copyright (c) 2011 The LevelDB Authors. All rights reserved. | |||
7 | // Use of this source code is governed by a BSD-style license that can be | |||
8 | // found in the LICENSE file. See the AUTHORS file for names of contributors. | |||
9 | ||||
10 | #ifdef ROCKSDB_LIB_IO_POSIX1 | |||
11 | #include "env/io_posix.h" | |||
12 | #include <errno(*__errno_location ()).h> | |||
13 | #include <fcntl.h> | |||
14 | #include <algorithm> | |||
15 | #if defined(OS_LINUX1) | |||
16 | #include <linux/fs.h> | |||
17 | #endif | |||
18 | #include <stdio.h> | |||
19 | #include <stdlib.h> | |||
20 | #include <string.h> | |||
21 | #include <sys/ioctl.h> | |||
22 | #include <sys/mman.h> | |||
23 | #include <sys/stat.h> | |||
24 | #include <sys/types.h> | |||
25 | #ifdef OS_LINUX1 | |||
26 | #include <sys/statfs.h> | |||
27 | #include <sys/syscall.h> | |||
28 | #include <sys/sysmacros.h> | |||
29 | #endif | |||
30 | #include "env/posix_logger.h" | |||
31 | #include "monitoring/iostats_context_imp.h" | |||
32 | #include "port/port.h" | |||
33 | #include "rocksdb/slice.h" | |||
34 | #include "util/coding.h" | |||
35 | #include "util/string_util.h" | |||
36 | #include "util/sync_point.h" | |||
37 | ||||
38 | #if defined(OS_LINUX1) && !defined(F_SET_RW_HINT(1024 + 12)) | |||
39 | #define F_LINUX_SPECIFIC_BASE1024 1024 | |||
40 | #define F_SET_RW_HINT(1024 + 12) (F_LINUX_SPECIFIC_BASE1024 + 12) | |||
41 | #endif | |||
42 | ||||
43 | namespace rocksdb { | |||
44 | ||||
45 | // A wrapper for fadvise, if the platform doesn't support fadvise, | |||
46 | // it will simply return 0. | |||
47 | int Fadvise(int fd, off_t offset, size_t len, int advice) { | |||
48 | #ifdef OS_LINUX1 | |||
49 | return posix_fadvise(fd, offset, len, advice); | |||
50 | #else | |||
51 | (void)fd; | |||
52 | (void)offset; | |||
53 | (void)len; | |||
54 | (void)advice; | |||
55 | return 0; // simply do nothing. | |||
56 | #endif | |||
57 | } | |||
58 | ||||
59 | namespace { | |||
60 | size_t GetLogicalBufferSize(int __attribute__((__unused__)) fd) { | |||
61 | #ifdef OS_LINUX1 | |||
62 | struct stat buf; | |||
63 | int result = fstat(fd, &buf); | |||
64 | if (result == -1) { | |||
65 | return kDefaultPageSize; | |||
66 | } | |||
67 | if (major(buf.st_dev)gnu_dev_major (buf.st_dev) == 0) { | |||
68 | // Unnamed devices (e.g. non-device mounts), reserved as null device number. | |||
69 | // These don't have an entry in /sys/dev/block/. Return a sensible default. | |||
70 | return kDefaultPageSize; | |||
71 | } | |||
72 | ||||
73 | // Reading queue/logical_block_size does not require special permissions. | |||
74 | const int kBufferSize = 100; | |||
75 | char path[kBufferSize]; | |||
76 | char real_path[PATH_MAX4096 + 1]; | |||
77 | snprintf(path, kBufferSize, "/sys/dev/block/%u:%u", major(buf.st_dev)gnu_dev_major (buf.st_dev), | |||
78 | minor(buf.st_dev)gnu_dev_minor (buf.st_dev)); | |||
79 | if (realpath(path, real_path) == nullptr) { | |||
80 | return kDefaultPageSize; | |||
81 | } | |||
82 | std::string device_dir(real_path); | |||
83 | if (!device_dir.empty() && device_dir.back() == '/') { | |||
84 | device_dir.pop_back(); | |||
85 | } | |||
86 | // NOTE: sda3 and nvme0n1p1 do not have a `queue/` subdir, only the parent sda | |||
87 | // and nvme0n1 have it. | |||
88 | // $ ls -al '/sys/dev/block/8:3' | |||
89 | // lrwxrwxrwx. 1 root root 0 Jun 26 01:38 /sys/dev/block/8:3 -> | |||
90 | // ../../block/sda/sda3 | |||
91 | // $ ls -al '/sys/dev/block/259:4' | |||
92 | // lrwxrwxrwx 1 root root 0 Jan 31 16:04 /sys/dev/block/259:4 -> | |||
93 | // ../../devices/pci0000:17/0000:17:00.0/0000:18:00.0/nvme/nvme0/nvme0n1/nvme0n1p1 | |||
94 | size_t parent_end = device_dir.rfind('/', device_dir.length() - 1); | |||
95 | if (parent_end == std::string::npos) { | |||
96 | return kDefaultPageSize; | |||
97 | } | |||
98 | size_t parent_begin = device_dir.rfind('/', parent_end - 1); | |||
99 | if (parent_begin == std::string::npos) { | |||
100 | return kDefaultPageSize; | |||
101 | } | |||
102 | std::string parent = | |||
103 | device_dir.substr(parent_begin + 1, parent_end - parent_begin - 1); | |||
104 | std::string child = device_dir.substr(parent_end + 1, std::string::npos); | |||
105 | if (parent != "block" && | |||
106 | (child.compare(0, 4, "nvme") || child.find('p') != std::string::npos)) { | |||
107 | device_dir = device_dir.substr(0, parent_end); | |||
108 | } | |||
109 | std::string fname = device_dir + "/queue/logical_block_size"; | |||
110 | FILE* fp; | |||
111 | size_t size = 0; | |||
112 | fp = fopen(fname.c_str(), "r"); | |||
113 | if (fp != nullptr) { | |||
114 | char* line = nullptr; | |||
115 | size_t len = 0; | |||
116 | if (getline(&line, &len, fp) != -1) { | |||
117 | sscanf(line, "%zu", &size); | |||
118 | } | |||
119 | free(line); | |||
120 | fclose(fp); | |||
121 | } | |||
122 | if (size != 0 && (size & (size - 1)) == 0) { | |||
123 | return size; | |||
124 | } | |||
125 | #endif | |||
126 | return kDefaultPageSize; | |||
127 | } | |||
128 | } // namespace | |||
129 | ||||
130 | /* | |||
131 | * DirectIOHelper | |||
132 | */ | |||
133 | #ifndef NDEBUG1 | |||
134 | namespace { | |||
135 | ||||
136 | bool IsSectorAligned(const size_t off, size_t sector_size) { | |||
137 | return off % sector_size == 0; | |||
138 | } | |||
139 | ||||
140 | bool IsSectorAligned(const void* ptr, size_t sector_size) { | |||
141 | return uintptr_t(ptr) % sector_size == 0; | |||
142 | } | |||
143 | ||||
144 | } | |||
145 | #endif | |||
146 | ||||
147 | /* | |||
148 | * PosixSequentialFile | |||
149 | */ | |||
150 | PosixSequentialFile::PosixSequentialFile(const std::string& fname, FILE* file, | |||
151 | int fd, const EnvOptions& options) | |||
152 | : filename_(fname), | |||
153 | file_(file), | |||
154 | fd_(fd), | |||
155 | use_direct_io_(options.use_direct_reads), | |||
156 | logical_sector_size_(GetLogicalBufferSize(fd_)) { | |||
157 | assert(!options.use_direct_reads || !options.use_mmap_reads)(static_cast<void> (0)); | |||
158 | } | |||
159 | ||||
160 | PosixSequentialFile::~PosixSequentialFile() { | |||
161 | if (!use_direct_io()) { | |||
162 | assert(file_)(static_cast<void> (0)); | |||
163 | fclose(file_); | |||
164 | } else { | |||
165 | assert(fd_)(static_cast<void> (0)); | |||
166 | close(fd_); | |||
167 | } | |||
168 | } | |||
169 | ||||
170 | Status PosixSequentialFile::Read(size_t n, Slice* result, char* scratch) { | |||
171 | assert(result != nullptr && !use_direct_io())(static_cast<void> (0)); | |||
172 | Status s; | |||
173 | size_t r = 0; | |||
174 | do { | |||
175 | r = fread_unlocked(scratch, 1, n, file_); | |||
176 | } while (r == 0 && ferror(file_) && errno(*__errno_location ()) == EINTR4); | |||
177 | *result = Slice(scratch, r); | |||
178 | if (r < n) { | |||
179 | if (feof(file_)) { | |||
180 | // We leave status as ok if we hit the end of the file | |||
181 | // We also clear the error so that the reads can continue | |||
182 | // if a new data is written to the file | |||
183 | clearerr(file_); | |||
184 | } else { | |||
185 | // A partial read with an error: return a non-ok status | |||
186 | s = IOError("While reading file sequentially", filename_, errno(*__errno_location ())); | |||
187 | } | |||
188 | } | |||
189 | return s; | |||
190 | } | |||
191 | ||||
192 | Status PosixSequentialFile::PositionedRead(uint64_t offset, size_t n, | |||
193 | Slice* result, char* scratch) { | |||
194 | assert(use_direct_io())(static_cast<void> (0)); | |||
195 | assert(IsSectorAligned(offset, GetRequiredBufferAlignment()))(static_cast<void> (0)); | |||
196 | assert(IsSectorAligned(n, GetRequiredBufferAlignment()))(static_cast<void> (0)); | |||
197 | assert(IsSectorAligned(scratch, GetRequiredBufferAlignment()))(static_cast<void> (0)); | |||
198 | ||||
199 | Status s; | |||
200 | ssize_t r = -1; | |||
201 | size_t left = n; | |||
202 | char* ptr = scratch; | |||
203 | while (left > 0) { | |||
204 | r = pread(fd_, ptr, left, static_cast<off_t>(offset)); | |||
205 | if (r <= 0) { | |||
206 | if (r == -1 && errno(*__errno_location ()) == EINTR4) { | |||
207 | continue; | |||
208 | } | |||
209 | break; | |||
210 | } | |||
211 | ptr += r; | |||
212 | offset += r; | |||
213 | left -= r; | |||
214 | if (r % static_cast<ssize_t>(GetRequiredBufferAlignment()) != 0) { | |||
215 | // Bytes reads don't fill sectors. Should only happen at the end | |||
216 | // of the file. | |||
217 | break; | |||
218 | } | |||
219 | } | |||
220 | if (r < 0) { | |||
221 | // An error: return a non-ok status | |||
222 | s = IOError( | |||
223 | "While pread " + ToString(n) + " bytes from offset " + ToString(offset), | |||
224 | filename_, errno(*__errno_location ())); | |||
225 | } | |||
226 | *result = Slice(scratch, (r < 0) ? 0 : n - left); | |||
227 | return s; | |||
228 | } | |||
229 | ||||
230 | Status PosixSequentialFile::Skip(uint64_t n) { | |||
231 | if (fseek(file_, static_cast<long int>(n), SEEK_CUR1)) { | |||
232 | return IOError("While fseek to skip " + ToString(n) + " bytes", filename_, | |||
233 | errno(*__errno_location ())); | |||
234 | } | |||
235 | return Status::OK(); | |||
236 | } | |||
237 | ||||
238 | Status PosixSequentialFile::InvalidateCache(size_t offset, size_t length) { | |||
239 | #ifndef OS_LINUX1 | |||
240 | (void)offset; | |||
241 | (void)length; | |||
242 | return Status::OK(); | |||
243 | #else | |||
244 | if (!use_direct_io()) { | |||
245 | // free OS pages | |||
246 | int ret = Fadvise(fd_, offset, length, POSIX_FADV_DONTNEED4); | |||
247 | if (ret != 0) { | |||
248 | return IOError("While fadvise NotNeeded offset " + ToString(offset) + | |||
249 | " len " + ToString(length), | |||
250 | filename_, errno(*__errno_location ())); | |||
251 | } | |||
252 | } | |||
253 | return Status::OK(); | |||
254 | #endif | |||
255 | } | |||
256 | ||||
257 | /* | |||
258 | * PosixRandomAccessFile | |||
259 | */ | |||
260 | #if defined(OS_LINUX1) | |||
261 | size_t PosixHelper::GetUniqueIdFromFile(int fd, char* id, size_t max_size) { | |||
262 | if (max_size < kMaxVarint64Length * 3) { | |||
263 | return 0; | |||
264 | } | |||
265 | ||||
266 | struct stat buf; | |||
267 | int result = fstat(fd, &buf); | |||
268 | if (result == -1) { | |||
269 | return 0; | |||
270 | } | |||
271 | ||||
272 | long version = 0; | |||
273 | result = ioctl(fd, FS_IOC_GETVERSION(((2U) << (((0 +8)+8)+14)) | ((('v')) << (0 +8)) | (((1)) << 0) | ((((sizeof(long)))) << ((0 +8)+8) )), &version); | |||
274 | TEST_SYNC_POINT_CALLBACK("GetUniqueIdFromFile:FS_IOC_GETVERSION", &result); | |||
275 | if (result == -1) { | |||
276 | return 0; | |||
277 | } | |||
278 | uint64_t uversion = (uint64_t)version; | |||
279 | ||||
280 | char* rid = id; | |||
281 | rid = EncodeVarint64(rid, buf.st_dev); | |||
282 | rid = EncodeVarint64(rid, buf.st_ino); | |||
283 | rid = EncodeVarint64(rid, uversion); | |||
284 | assert(rid >= id)(static_cast<void> (0)); | |||
285 | return static_cast<size_t>(rid - id); | |||
286 | } | |||
287 | #endif | |||
288 | ||||
289 | #if defined(OS_MACOSX) || defined(OS_AIX) | |||
290 | size_t PosixHelper::GetUniqueIdFromFile(int fd, char* id, size_t max_size) { | |||
291 | if (max_size < kMaxVarint64Length * 3) { | |||
292 | return 0; | |||
293 | } | |||
294 | ||||
295 | struct stat buf; | |||
296 | int result = fstat(fd, &buf); | |||
297 | if (result == -1) { | |||
298 | return 0; | |||
299 | } | |||
300 | ||||
301 | char* rid = id; | |||
302 | rid = EncodeVarint64(rid, buf.st_dev); | |||
303 | rid = EncodeVarint64(rid, buf.st_ino); | |||
304 | rid = EncodeVarint64(rid, buf.st_gen); | |||
305 | assert(rid >= id)(static_cast<void> (0)); | |||
306 | return static_cast<size_t>(rid - id); | |||
307 | } | |||
308 | #endif | |||
309 | /* | |||
310 | * PosixRandomAccessFile | |||
311 | * | |||
312 | * pread() based random-access | |||
313 | */ | |||
314 | PosixRandomAccessFile::PosixRandomAccessFile(const std::string& fname, int fd, | |||
315 | const EnvOptions& options) | |||
316 | : filename_(fname), | |||
317 | fd_(fd), | |||
318 | use_direct_io_(options.use_direct_reads), | |||
319 | logical_sector_size_(GetLogicalBufferSize(fd_)) { | |||
320 | assert(!options.use_direct_reads || !options.use_mmap_reads)(static_cast<void> (0)); | |||
321 | assert(!options.use_mmap_reads || sizeof(void*) < 8)(static_cast<void> (0)); | |||
322 | } | |||
323 | ||||
324 | PosixRandomAccessFile::~PosixRandomAccessFile() { close(fd_); } | |||
325 | ||||
326 | Status PosixRandomAccessFile::Read(uint64_t offset, size_t n, Slice* result, | |||
327 | char* scratch) const { | |||
328 | if (use_direct_io()) { | |||
329 | assert(IsSectorAligned(offset, GetRequiredBufferAlignment()))(static_cast<void> (0)); | |||
330 | assert(IsSectorAligned(n, GetRequiredBufferAlignment()))(static_cast<void> (0)); | |||
331 | assert(IsSectorAligned(scratch, GetRequiredBufferAlignment()))(static_cast<void> (0)); | |||
332 | } | |||
333 | Status s; | |||
334 | ssize_t r = -1; | |||
335 | size_t left = n; | |||
336 | char* ptr = scratch; | |||
337 | while (left > 0) { | |||
338 | r = pread(fd_, ptr, left, static_cast<off_t>(offset)); | |||
339 | if (r <= 0) { | |||
340 | if (r == -1 && errno(*__errno_location ()) == EINTR4) { | |||
341 | continue; | |||
342 | } | |||
343 | break; | |||
344 | } | |||
345 | ptr += r; | |||
346 | offset += r; | |||
347 | left -= r; | |||
348 | if (use_direct_io() && | |||
349 | r % static_cast<ssize_t>(GetRequiredBufferAlignment()) != 0) { | |||
350 | // Bytes reads don't fill sectors. Should only happen at the end | |||
351 | // of the file. | |||
352 | break; | |||
353 | } | |||
354 | } | |||
355 | if (r < 0) { | |||
356 | // An error: return a non-ok status | |||
357 | s = IOError( | |||
358 | "While pread offset " + ToString(offset) + " len " + ToString(n), | |||
359 | filename_, errno(*__errno_location ())); | |||
360 | } | |||
361 | *result = Slice(scratch, (r < 0) ? 0 : n - left); | |||
362 | return s; | |||
363 | } | |||
364 | ||||
365 | Status PosixRandomAccessFile::Prefetch(uint64_t offset, size_t n) { | |||
366 | Status s; | |||
367 | if (!use_direct_io()) { | |||
368 | ssize_t r = 0; | |||
369 | #ifdef OS_LINUX1 | |||
370 | r = readahead(fd_, offset, n); | |||
371 | #endif | |||
372 | #ifdef OS_MACOSX | |||
373 | radvisory advice; | |||
374 | advice.ra_offset = static_cast<off_t>(offset); | |||
375 | advice.ra_count = static_cast<int>(n); | |||
376 | r = fcntl(fd_, F_RDADVISE, &advice); | |||
377 | #endif | |||
378 | if (r == -1) { | |||
379 | s = IOError("While prefetching offset " + ToString(offset) + " len " + | |||
380 | ToString(n), | |||
381 | filename_, errno(*__errno_location ())); | |||
382 | } | |||
383 | } | |||
384 | return s; | |||
385 | } | |||
386 | ||||
387 | #if defined(OS_LINUX1) || defined(OS_MACOSX) || defined(OS_AIX) | |||
388 | size_t PosixRandomAccessFile::GetUniqueId(char* id, size_t max_size) const { | |||
389 | return PosixHelper::GetUniqueIdFromFile(fd_, id, max_size); | |||
390 | } | |||
391 | #endif | |||
392 | ||||
393 | void PosixRandomAccessFile::Hint(AccessPattern pattern) { | |||
394 | if (use_direct_io()) { | |||
395 | return; | |||
396 | } | |||
397 | switch (pattern) { | |||
398 | case NORMAL: | |||
399 | Fadvise(fd_, 0, 0, POSIX_FADV_NORMAL0); | |||
400 | break; | |||
401 | case RANDOM: | |||
402 | Fadvise(fd_, 0, 0, POSIX_FADV_RANDOM1); | |||
403 | break; | |||
404 | case SEQUENTIAL: | |||
405 | Fadvise(fd_, 0, 0, POSIX_FADV_SEQUENTIAL2); | |||
406 | break; | |||
407 | case WILLNEED: | |||
408 | Fadvise(fd_, 0, 0, POSIX_FADV_WILLNEED3); | |||
409 | break; | |||
410 | case DONTNEED: | |||
411 | Fadvise(fd_, 0, 0, POSIX_FADV_DONTNEED4); | |||
412 | break; | |||
413 | default: | |||
414 | assert(false)(static_cast<void> (0)); | |||
415 | break; | |||
416 | } | |||
417 | } | |||
418 | ||||
419 | Status PosixRandomAccessFile::InvalidateCache(size_t offset, size_t length) { | |||
420 | if (use_direct_io()) { | |||
421 | return Status::OK(); | |||
422 | } | |||
423 | #ifndef OS_LINUX1 | |||
424 | (void)offset; | |||
425 | (void)length; | |||
426 | return Status::OK(); | |||
427 | #else | |||
428 | // free OS pages | |||
429 | int ret = Fadvise(fd_, offset, length, POSIX_FADV_DONTNEED4); | |||
430 | if (ret == 0) { | |||
431 | return Status::OK(); | |||
432 | } | |||
433 | return IOError("While fadvise NotNeeded offset " + ToString(offset) + | |||
434 | " len " + ToString(length), | |||
435 | filename_, errno(*__errno_location ())); | |||
436 | #endif | |||
437 | } | |||
438 | ||||
439 | /* | |||
440 | * PosixMmapReadableFile | |||
441 | * | |||
442 | * mmap() based random-access | |||
443 | */ | |||
444 | // base[0,length-1] contains the mmapped contents of the file. | |||
445 | PosixMmapReadableFile::PosixMmapReadableFile(const int fd, | |||
446 | const std::string& fname, | |||
447 | void* base, size_t length, | |||
448 | const EnvOptions& options) | |||
449 | : fd_(fd), filename_(fname), mmapped_region_(base), length_(length) { | |||
450 | #ifdef NDEBUG1 | |||
451 | (void)options; | |||
452 | #endif | |||
453 | fd_ = fd_ + 0; // suppress the warning for used variables | |||
454 | assert(options.use_mmap_reads)(static_cast<void> (0)); | |||
455 | assert(!options.use_direct_reads)(static_cast<void> (0)); | |||
456 | } | |||
457 | ||||
458 | PosixMmapReadableFile::~PosixMmapReadableFile() { | |||
459 | int ret = munmap(mmapped_region_, length_); | |||
460 | if (ret != 0) { | |||
461 | fprintf(stdoutstdout, "failed to munmap %p length %" ROCKSDB_PRIszt"zu" " \n", | |||
462 | mmapped_region_, length_); | |||
463 | } | |||
464 | close(fd_); | |||
465 | } | |||
466 | ||||
467 | Status PosixMmapReadableFile::Read(uint64_t offset, size_t n, Slice* result, | |||
468 | char* /*scratch*/) const { | |||
469 | Status s; | |||
470 | if (offset > length_) { | |||
471 | *result = Slice(); | |||
472 | return IOError("While mmap read offset " + ToString(offset) + | |||
473 | " larger than file length " + ToString(length_), | |||
474 | filename_, EINVAL22); | |||
475 | } else if (offset + n > length_) { | |||
476 | n = static_cast<size_t>(length_ - offset); | |||
477 | } | |||
478 | *result = Slice(reinterpret_cast<char*>(mmapped_region_) + offset, n); | |||
479 | return s; | |||
480 | } | |||
481 | ||||
482 | Status PosixMmapReadableFile::InvalidateCache(size_t offset, size_t length) { | |||
483 | #ifndef OS_LINUX1 | |||
484 | (void)offset; | |||
485 | (void)length; | |||
486 | return Status::OK(); | |||
487 | #else | |||
488 | // free OS pages | |||
489 | int ret = Fadvise(fd_, offset, length, POSIX_FADV_DONTNEED4); | |||
490 | if (ret == 0) { | |||
491 | return Status::OK(); | |||
492 | } | |||
493 | return IOError("While fadvise not needed. Offset " + ToString(offset) + | |||
494 | " len" + ToString(length), | |||
495 | filename_, errno(*__errno_location ())); | |||
496 | #endif | |||
497 | } | |||
498 | ||||
499 | /* | |||
500 | * PosixMmapFile | |||
501 | * | |||
502 | * We preallocate up to an extra megabyte and use memcpy to append new | |||
503 | * data to the file. This is safe since we either properly close the | |||
504 | * file before reading from it, or for log files, the reading code | |||
505 | * knows enough to skip zero suffixes. | |||
506 | */ | |||
507 | Status PosixMmapFile::UnmapCurrentRegion() { | |||
508 | TEST_KILL_RANDOM("PosixMmapFile::UnmapCurrentRegion:0", rocksdb_kill_odds); | |||
509 | if (base_ != nullptr) { | |||
510 | int munmap_status = munmap(base_, limit_ - base_); | |||
511 | if (munmap_status != 0) { | |||
512 | return IOError("While munmap", filename_, munmap_status); | |||
513 | } | |||
514 | file_offset_ += limit_ - base_; | |||
515 | base_ = nullptr; | |||
516 | limit_ = nullptr; | |||
517 | last_sync_ = nullptr; | |||
518 | dst_ = nullptr; | |||
519 | ||||
520 | // Increase the amount we map the next time, but capped at 1MB | |||
521 | if (map_size_ < (1 << 20)) { | |||
522 | map_size_ *= 2; | |||
523 | } | |||
524 | } | |||
525 | return Status::OK(); | |||
526 | } | |||
527 | ||||
528 | Status PosixMmapFile::MapNewRegion() { | |||
529 | #ifdef ROCKSDB_FALLOCATE_PRESENT1 | |||
530 | assert(base_ == nullptr)(static_cast<void> (0)); | |||
531 | TEST_KILL_RANDOM("PosixMmapFile::UnmapCurrentRegion:0", rocksdb_kill_odds); | |||
532 | // we can't fallocate with FALLOC_FL_KEEP_SIZE here | |||
533 | if (allow_fallocate_) { | |||
534 | IOSTATS_TIMER_GUARD(allocate_nanos)PerfStepTimer iostats_step_timer_allocate_nanos(&(iostats_context .allocate_nanos)); iostats_step_timer_allocate_nanos.Start();; | |||
535 | int alloc_status = fallocate(fd_, 0, file_offset_, map_size_); | |||
536 | if (alloc_status != 0) { | |||
537 | // fallback to posix_fallocate | |||
538 | alloc_status = posix_fallocate(fd_, file_offset_, map_size_); | |||
539 | } | |||
540 | if (alloc_status != 0) { | |||
541 | return Status::IOError("Error allocating space to file : " + filename_ + | |||
542 | "Error : " + strerror(alloc_status)); | |||
543 | } | |||
544 | } | |||
545 | ||||
546 | TEST_KILL_RANDOM("PosixMmapFile::Append:1", rocksdb_kill_odds); | |||
547 | void* ptr = mmap(nullptr, map_size_, PROT_READ0x1 | PROT_WRITE0x2, MAP_SHARED0x01, fd_, | |||
548 | file_offset_); | |||
549 | if (ptr == MAP_FAILED((void *) -1)) { | |||
550 | return Status::IOError("MMap failed on " + filename_); | |||
551 | } | |||
552 | TEST_KILL_RANDOM("PosixMmapFile::Append:2", rocksdb_kill_odds); | |||
553 | ||||
554 | base_ = reinterpret_cast<char*>(ptr); | |||
555 | limit_ = base_ + map_size_; | |||
556 | dst_ = base_; | |||
557 | last_sync_ = base_; | |||
558 | return Status::OK(); | |||
559 | #else | |||
560 | return Status::NotSupported("This platform doesn't support fallocate()"); | |||
561 | #endif | |||
562 | } | |||
563 | ||||
564 | Status PosixMmapFile::Msync() { | |||
565 | if (dst_ == last_sync_) { | |||
566 | return Status::OK(); | |||
567 | } | |||
568 | // Find the beginnings of the pages that contain the first and last | |||
569 | // bytes to be synced. | |||
570 | size_t p1 = TruncateToPageBoundary(last_sync_ - base_); | |||
571 | size_t p2 = TruncateToPageBoundary(dst_ - base_ - 1); | |||
572 | last_sync_ = dst_; | |||
573 | TEST_KILL_RANDOM("PosixMmapFile::Msync:0", rocksdb_kill_odds); | |||
574 | if (msync(base_ + p1, p2 - p1 + page_size_, MS_SYNC4) < 0) { | |||
575 | return IOError("While msync", filename_, errno(*__errno_location ())); | |||
576 | } | |||
577 | return Status::OK(); | |||
578 | } | |||
579 | ||||
580 | PosixMmapFile::PosixMmapFile(const std::string& fname, int fd, size_t page_size, | |||
581 | const EnvOptions& options) | |||
582 | : filename_(fname), | |||
583 | fd_(fd), | |||
584 | page_size_(page_size), | |||
585 | map_size_(Roundup(65536, page_size)), | |||
586 | base_(nullptr), | |||
587 | limit_(nullptr), | |||
588 | dst_(nullptr), | |||
589 | last_sync_(nullptr), | |||
590 | file_offset_(0) { | |||
591 | #ifdef ROCKSDB_FALLOCATE_PRESENT1 | |||
592 | allow_fallocate_ = options.allow_fallocate; | |||
593 | fallocate_with_keep_size_ = options.fallocate_with_keep_size; | |||
594 | #else | |||
595 | (void)options; | |||
596 | #endif | |||
597 | assert((page_size & (page_size - 1)) == 0)(static_cast<void> (0)); | |||
598 | assert(options.use_mmap_writes)(static_cast<void> (0)); | |||
599 | assert(!options.use_direct_writes)(static_cast<void> (0)); | |||
600 | } | |||
601 | ||||
602 | PosixMmapFile::~PosixMmapFile() { | |||
603 | if (fd_ >= 0) { | |||
604 | PosixMmapFile::Close(); | |||
605 | } | |||
606 | } | |||
607 | ||||
608 | Status PosixMmapFile::Append(const Slice& data) { | |||
609 | const char* src = data.data(); | |||
610 | size_t left = data.size(); | |||
611 | while (left > 0) { | |||
| ||||
612 | assert(base_ <= dst_)(static_cast<void> (0)); | |||
613 | assert(dst_ <= limit_)(static_cast<void> (0)); | |||
614 | size_t avail = limit_ - dst_; | |||
615 | if (avail == 0) { | |||
616 | Status s = UnmapCurrentRegion(); | |||
617 | if (!s.ok()) { | |||
618 | return s; | |||
619 | } | |||
620 | s = MapNewRegion(); | |||
621 | if (!s.ok()) { | |||
622 | return s; | |||
623 | } | |||
624 | TEST_KILL_RANDOM("PosixMmapFile::Append:0", rocksdb_kill_odds); | |||
625 | } | |||
626 | ||||
627 | size_t n = (left <= avail) ? left : avail; | |||
628 | assert(dst_)(static_cast<void> (0)); | |||
629 | memcpy(dst_, src, n); | |||
| ||||
630 | dst_ += n; | |||
631 | src += n; | |||
632 | left -= n; | |||
633 | } | |||
634 | return Status::OK(); | |||
635 | } | |||
636 | ||||
637 | Status PosixMmapFile::Close() { | |||
638 | Status s; | |||
639 | size_t unused = limit_ - dst_; | |||
640 | ||||
641 | s = UnmapCurrentRegion(); | |||
642 | if (!s.ok()) { | |||
643 | s = IOError("While closing mmapped file", filename_, errno(*__errno_location ())); | |||
644 | } else if (unused > 0) { | |||
645 | // Trim the extra space at the end of the file | |||
646 | if (ftruncate(fd_, file_offset_ - unused) < 0) { | |||
647 | s = IOError("While ftruncating mmaped file", filename_, errno(*__errno_location ())); | |||
648 | } | |||
649 | } | |||
650 | ||||
651 | if (close(fd_) < 0) { | |||
652 | if (s.ok()) { | |||
653 | s = IOError("While closing mmapped file", filename_, errno(*__errno_location ())); | |||
654 | } | |||
655 | } | |||
656 | ||||
657 | fd_ = -1; | |||
658 | base_ = nullptr; | |||
659 | limit_ = nullptr; | |||
660 | return s; | |||
661 | } | |||
662 | ||||
663 | Status PosixMmapFile::Flush() { return Status::OK(); } | |||
664 | ||||
665 | Status PosixMmapFile::Sync() { | |||
666 | if (fdatasync(fd_) < 0) { | |||
667 | return IOError("While fdatasync mmapped file", filename_, errno(*__errno_location ())); | |||
668 | } | |||
669 | ||||
670 | return Msync(); | |||
671 | } | |||
672 | ||||
673 | /** | |||
674 | * Flush data as well as metadata to stable storage. | |||
675 | */ | |||
676 | Status PosixMmapFile::Fsync() { | |||
677 | if (fsync(fd_) < 0) { | |||
678 | return IOError("While fsync mmaped file", filename_, errno(*__errno_location ())); | |||
679 | } | |||
680 | ||||
681 | return Msync(); | |||
682 | } | |||
683 | ||||
684 | /** | |||
685 | * Get the size of valid data in the file. This will not match the | |||
686 | * size that is returned from the filesystem because we use mmap | |||
687 | * to extend file by map_size every time. | |||
688 | */ | |||
689 | uint64_t PosixMmapFile::GetFileSize() { | |||
690 | size_t used = dst_ - base_; | |||
691 | return file_offset_ + used; | |||
692 | } | |||
693 | ||||
694 | Status PosixMmapFile::InvalidateCache(size_t offset, size_t length) { | |||
695 | #ifndef OS_LINUX1 | |||
696 | (void)offset; | |||
697 | (void)length; | |||
698 | return Status::OK(); | |||
699 | #else | |||
700 | // free OS pages | |||
701 | int ret = Fadvise(fd_, offset, length, POSIX_FADV_DONTNEED4); | |||
702 | if (ret == 0) { | |||
703 | return Status::OK(); | |||
704 | } | |||
705 | return IOError("While fadvise NotNeeded mmapped file", filename_, errno(*__errno_location ())); | |||
706 | #endif | |||
707 | } | |||
708 | ||||
709 | #ifdef ROCKSDB_FALLOCATE_PRESENT1 | |||
710 | Status PosixMmapFile::Allocate(uint64_t offset, uint64_t len) { | |||
711 | assert(offset <= std::numeric_limits<off_t>::max())(static_cast<void> (0)); | |||
712 | assert(len <= std::numeric_limits<off_t>::max())(static_cast<void> (0)); | |||
713 | TEST_KILL_RANDOM("PosixMmapFile::Allocate:0", rocksdb_kill_odds); | |||
714 | int alloc_status = 0; | |||
715 | if (allow_fallocate_) { | |||
716 | alloc_status = fallocate( | |||
717 | fd_, fallocate_with_keep_size_ ? FALLOC_FL_KEEP_SIZE1 : 0, | |||
718 | static_cast<off_t>(offset), static_cast<off_t>(len)); | |||
719 | } | |||
720 | if (alloc_status == 0) { | |||
721 | return Status::OK(); | |||
722 | } else { | |||
723 | return IOError( | |||
724 | "While fallocate offset " + ToString(offset) + " len " + ToString(len), | |||
725 | filename_, errno(*__errno_location ())); | |||
726 | } | |||
727 | } | |||
728 | #endif | |||
729 | ||||
730 | /* | |||
731 | * PosixWritableFile | |||
732 | * | |||
733 | * Use posix write to write data to a file. | |||
734 | */ | |||
735 | PosixWritableFile::PosixWritableFile(const std::string& fname, int fd, | |||
736 | const EnvOptions& options) | |||
737 | : filename_(fname), | |||
738 | use_direct_io_(options.use_direct_writes), | |||
739 | fd_(fd), | |||
740 | filesize_(0), | |||
741 | logical_sector_size_(GetLogicalBufferSize(fd_)) { | |||
742 | #ifdef ROCKSDB_FALLOCATE_PRESENT1 | |||
743 | allow_fallocate_ = options.allow_fallocate; | |||
744 | fallocate_with_keep_size_ = options.fallocate_with_keep_size; | |||
745 | #endif | |||
746 | assert(!options.use_mmap_writes)(static_cast<void> (0)); | |||
747 | } | |||
748 | ||||
749 | PosixWritableFile::~PosixWritableFile() { | |||
750 | if (fd_ >= 0) { | |||
751 | PosixWritableFile::Close(); | |||
752 | } | |||
753 | } | |||
754 | ||||
755 | Status PosixWritableFile::Append(const Slice& data) { | |||
756 | if (use_direct_io()) { | |||
757 | assert(IsSectorAligned(data.size(), GetRequiredBufferAlignment()))(static_cast<void> (0)); | |||
758 | assert(IsSectorAligned(data.data(), GetRequiredBufferAlignment()))(static_cast<void> (0)); | |||
759 | } | |||
760 | const char* src = data.data(); | |||
761 | size_t left = data.size(); | |||
762 | while (left != 0) { | |||
763 | ssize_t done = write(fd_, src, left); | |||
764 | if (done < 0) { | |||
765 | if (errno(*__errno_location ()) == EINTR4) { | |||
766 | continue; | |||
767 | } | |||
768 | return IOError("While appending to file", filename_, errno(*__errno_location ())); | |||
769 | } | |||
770 | left -= done; | |||
771 | src += done; | |||
772 | } | |||
773 | filesize_ += data.size(); | |||
774 | return Status::OK(); | |||
775 | } | |||
776 | ||||
777 | Status PosixWritableFile::PositionedAppend(const Slice& data, uint64_t offset) { | |||
778 | if (use_direct_io()) { | |||
779 | assert(IsSectorAligned(offset, GetRequiredBufferAlignment()))(static_cast<void> (0)); | |||
780 | assert(IsSectorAligned(data.size(), GetRequiredBufferAlignment()))(static_cast<void> (0)); | |||
781 | assert(IsSectorAligned(data.data(), GetRequiredBufferAlignment()))(static_cast<void> (0)); | |||
782 | } | |||
783 | assert(offset <= std::numeric_limits<off_t>::max())(static_cast<void> (0)); | |||
784 | const char* src = data.data(); | |||
785 | size_t left = data.size(); | |||
786 | while (left != 0) { | |||
787 | ssize_t done = pwrite(fd_, src, left, static_cast<off_t>(offset)); | |||
788 | if (done < 0) { | |||
789 | if (errno(*__errno_location ()) == EINTR4) { | |||
790 | continue; | |||
791 | } | |||
792 | return IOError("While pwrite to file at offset " + ToString(offset), | |||
793 | filename_, errno(*__errno_location ())); | |||
794 | } | |||
795 | left -= done; | |||
796 | offset += done; | |||
797 | src += done; | |||
798 | } | |||
799 | filesize_ = offset; | |||
800 | return Status::OK(); | |||
801 | } | |||
802 | ||||
803 | Status PosixWritableFile::Truncate(uint64_t size) { | |||
804 | Status s; | |||
805 | int r = ftruncate(fd_, size); | |||
806 | if (r < 0) { | |||
807 | s = IOError("While ftruncate file to size " + ToString(size), filename_, | |||
808 | errno(*__errno_location ())); | |||
809 | } else { | |||
810 | filesize_ = size; | |||
811 | } | |||
812 | return s; | |||
813 | } | |||
814 | ||||
815 | Status PosixWritableFile::Close() { | |||
816 | Status s; | |||
817 | ||||
818 | size_t block_size; | |||
819 | size_t last_allocated_block; | |||
820 | GetPreallocationStatus(&block_size, &last_allocated_block); | |||
821 | if (last_allocated_block > 0) { | |||
822 | // trim the extra space preallocated at the end of the file | |||
823 | // NOTE(ljin): we probably don't want to surface failure as an IOError, | |||
824 | // but it will be nice to log these errors. | |||
825 | int dummy __attribute__((__unused__)); | |||
826 | dummy = ftruncate(fd_, filesize_); | |||
827 | #if defined(ROCKSDB_FALLOCATE_PRESENT1) && defined(FALLOC_FL_PUNCH_HOLE2) && \ | |||
828 | !defined(TRAVIS) | |||
829 | // in some file systems, ftruncate only trims trailing space if the | |||
830 | // new file size is smaller than the current size. Calling fallocate | |||
831 | // with FALLOC_FL_PUNCH_HOLE flag to explicitly release these unused | |||
832 | // blocks. FALLOC_FL_PUNCH_HOLE is supported on at least the following | |||
833 | // filesystems: | |||
834 | // XFS (since Linux 2.6.38) | |||
835 | // ext4 (since Linux 3.0) | |||
836 | // Btrfs (since Linux 3.7) | |||
837 | // tmpfs (since Linux 3.5) | |||
838 | // We ignore error since failure of this operation does not affect | |||
839 | // correctness. | |||
840 | // TRAVIS - this code does not work on TRAVIS filesystems. | |||
841 | // the FALLOC_FL_KEEP_SIZE option is expected to not change the size | |||
842 | // of the file, but it does. Simple strace report will show that. | |||
843 | // While we work with Travis-CI team to figure out if this is a | |||
844 | // quirk of Docker/AUFS, we will comment this out. | |||
845 | struct stat file_stats; | |||
846 | int result = fstat(fd_, &file_stats); | |||
847 | // After ftruncate, we check whether ftruncate has the correct behavior. | |||
848 | // If not, we should hack it with FALLOC_FL_PUNCH_HOLE | |||
849 | if (result == 0 && | |||
850 | (file_stats.st_size + file_stats.st_blksize - 1) / | |||
851 | file_stats.st_blksize != | |||
852 | file_stats.st_blocks / (file_stats.st_blksize / 512)) { | |||
853 | IOSTATS_TIMER_GUARD(allocate_nanos)PerfStepTimer iostats_step_timer_allocate_nanos(&(iostats_context .allocate_nanos)); iostats_step_timer_allocate_nanos.Start();; | |||
854 | if (allow_fallocate_) { | |||
855 | fallocate(fd_, FALLOC_FL_KEEP_SIZE1 | FALLOC_FL_PUNCH_HOLE2, filesize_, | |||
856 | block_size * last_allocated_block - filesize_); | |||
857 | } | |||
858 | } | |||
859 | #endif | |||
860 | } | |||
861 | ||||
862 | if (close(fd_) < 0) { | |||
863 | s = IOError("While closing file after writing", filename_, errno(*__errno_location ())); | |||
864 | } | |||
865 | fd_ = -1; | |||
866 | return s; | |||
867 | } | |||
868 | ||||
869 | // write out the cached data to the OS cache | |||
870 | Status PosixWritableFile::Flush() { return Status::OK(); } | |||
871 | ||||
872 | Status PosixWritableFile::Sync() { | |||
873 | if (fdatasync(fd_) < 0) { | |||
874 | return IOError("While fdatasync", filename_, errno(*__errno_location ())); | |||
875 | } | |||
876 | return Status::OK(); | |||
877 | } | |||
878 | ||||
879 | Status PosixWritableFile::Fsync() { | |||
880 | if (fsync(fd_) < 0) { | |||
881 | return IOError("While fsync", filename_, errno(*__errno_location ())); | |||
882 | } | |||
883 | return Status::OK(); | |||
884 | } | |||
885 | ||||
886 | bool PosixWritableFile::IsSyncThreadSafe() const { return true; } | |||
887 | ||||
888 | uint64_t PosixWritableFile::GetFileSize() { return filesize_; } | |||
889 | ||||
890 | void PosixWritableFile::SetWriteLifeTimeHint(Env::WriteLifeTimeHint hint) { | |||
891 | #ifdef OS_LINUX1 | |||
892 | // Suppress Valgrind "Unimplemented functionality" error. | |||
893 | #ifndef ROCKSDB_VALGRIND_RUN | |||
894 | if (hint == write_hint_) { | |||
895 | return; | |||
896 | } | |||
897 | if (fcntl(fd_, F_SET_RW_HINT(1024 + 12), &hint) == 0) { | |||
898 | write_hint_ = hint; | |||
899 | } | |||
900 | #else | |||
901 | (void)hint; | |||
902 | #endif // ROCKSDB_VALGRIND_RUN | |||
903 | #else | |||
904 | (void)hint; | |||
905 | #endif // OS_LINUX | |||
906 | } | |||
907 | ||||
908 | Status PosixWritableFile::InvalidateCache(size_t offset, size_t length) { | |||
909 | if (use_direct_io()) { | |||
910 | return Status::OK(); | |||
911 | } | |||
912 | #ifndef OS_LINUX1 | |||
913 | (void)offset; | |||
914 | (void)length; | |||
915 | return Status::OK(); | |||
916 | #else | |||
917 | // free OS pages | |||
918 | int ret = Fadvise(fd_, offset, length, POSIX_FADV_DONTNEED4); | |||
919 | if (ret == 0) { | |||
920 | return Status::OK(); | |||
921 | } | |||
922 | return IOError("While fadvise NotNeeded", filename_, errno(*__errno_location ())); | |||
923 | #endif | |||
924 | } | |||
925 | ||||
926 | #ifdef ROCKSDB_FALLOCATE_PRESENT1 | |||
927 | Status PosixWritableFile::Allocate(uint64_t offset, uint64_t len) { | |||
928 | assert(offset <= std::numeric_limits<off_t>::max())(static_cast<void> (0)); | |||
929 | assert(len <= std::numeric_limits<off_t>::max())(static_cast<void> (0)); | |||
930 | TEST_KILL_RANDOM("PosixWritableFile::Allocate:0", rocksdb_kill_odds); | |||
931 | IOSTATS_TIMER_GUARD(allocate_nanos)PerfStepTimer iostats_step_timer_allocate_nanos(&(iostats_context .allocate_nanos)); iostats_step_timer_allocate_nanos.Start();; | |||
932 | int alloc_status = 0; | |||
933 | if (allow_fallocate_) { | |||
934 | alloc_status = fallocate( | |||
935 | fd_, fallocate_with_keep_size_ ? FALLOC_FL_KEEP_SIZE1 : 0, | |||
936 | static_cast<off_t>(offset), static_cast<off_t>(len)); | |||
937 | } | |||
938 | if (alloc_status == 0) { | |||
939 | return Status::OK(); | |||
940 | } else { | |||
941 | return IOError( | |||
942 | "While fallocate offset " + ToString(offset) + " len " + ToString(len), | |||
943 | filename_, errno(*__errno_location ())); | |||
944 | } | |||
945 | } | |||
946 | #endif | |||
947 | ||||
948 | #ifdef ROCKSDB_RANGESYNC_PRESENT1 | |||
949 | Status PosixWritableFile::RangeSync(uint64_t offset, uint64_t nbytes) { | |||
950 | assert(offset <= std::numeric_limits<off_t>::max())(static_cast<void> (0)); | |||
951 | assert(nbytes <= std::numeric_limits<off_t>::max())(static_cast<void> (0)); | |||
952 | if (sync_file_range(fd_, static_cast<off_t>(offset), | |||
953 | static_cast<off_t>(nbytes), SYNC_FILE_RANGE_WRITE2) == 0) { | |||
954 | return Status::OK(); | |||
955 | } else { | |||
956 | return IOError("While sync_file_range offset " + ToString(offset) + | |||
957 | " bytes " + ToString(nbytes), | |||
958 | filename_, errno(*__errno_location ())); | |||
959 | } | |||
960 | } | |||
961 | #endif | |||
962 | ||||
963 | #ifdef OS_LINUX1 | |||
964 | size_t PosixWritableFile::GetUniqueId(char* id, size_t max_size) const { | |||
965 | return PosixHelper::GetUniqueIdFromFile(fd_, id, max_size); | |||
966 | } | |||
967 | #endif | |||
968 | ||||
969 | /* | |||
970 | * PosixRandomRWFile | |||
971 | */ | |||
972 | ||||
973 | PosixRandomRWFile::PosixRandomRWFile(const std::string& fname, int fd, | |||
974 | const EnvOptions& /*options*/) | |||
975 | : filename_(fname), fd_(fd) {} | |||
976 | ||||
977 | PosixRandomRWFile::~PosixRandomRWFile() { | |||
978 | if (fd_ >= 0) { | |||
979 | Close(); | |||
980 | } | |||
981 | } | |||
982 | ||||
983 | Status PosixRandomRWFile::Write(uint64_t offset, const Slice& data) { | |||
984 | const char* src = data.data(); | |||
985 | size_t left = data.size(); | |||
986 | while (left != 0) { | |||
987 | ssize_t done = pwrite(fd_, src, left, offset); | |||
988 | if (done < 0) { | |||
989 | // error while writing to file | |||
990 | if (errno(*__errno_location ()) == EINTR4) { | |||
991 | // write was interrupted, try again. | |||
992 | continue; | |||
993 | } | |||
994 | return IOError( | |||
995 | "While write random read/write file at offset " + ToString(offset), | |||
996 | filename_, errno(*__errno_location ())); | |||
997 | } | |||
998 | ||||
999 | // Wrote `done` bytes | |||
1000 | left -= done; | |||
1001 | offset += done; | |||
1002 | src += done; | |||
1003 | } | |||
1004 | ||||
1005 | return Status::OK(); | |||
1006 | } | |||
1007 | ||||
1008 | Status PosixRandomRWFile::Read(uint64_t offset, size_t n, Slice* result, | |||
1009 | char* scratch) const { | |||
1010 | size_t left = n; | |||
1011 | char* ptr = scratch; | |||
1012 | while (left > 0) { | |||
1013 | ssize_t done = pread(fd_, ptr, left, offset); | |||
1014 | if (done < 0) { | |||
1015 | // error while reading from file | |||
1016 | if (errno(*__errno_location ()) == EINTR4) { | |||
1017 | // read was interrupted, try again. | |||
1018 | continue; | |||
1019 | } | |||
1020 | return IOError("While reading random read/write file offset " + | |||
1021 | ToString(offset) + " len " + ToString(n), | |||
1022 | filename_, errno(*__errno_location ())); | |||
1023 | } else if (done == 0) { | |||
1024 | // Nothing more to read | |||
1025 | break; | |||
1026 | } | |||
1027 | ||||
1028 | // Read `done` bytes | |||
1029 | ptr += done; | |||
1030 | offset += done; | |||
1031 | left -= done; | |||
1032 | } | |||
1033 | ||||
1034 | *result = Slice(scratch, n - left); | |||
1035 | return Status::OK(); | |||
1036 | } | |||
1037 | ||||
1038 | Status PosixRandomRWFile::Flush() { return Status::OK(); } | |||
1039 | ||||
1040 | Status PosixRandomRWFile::Sync() { | |||
1041 | if (fdatasync(fd_) < 0) { | |||
1042 | return IOError("While fdatasync random read/write file", filename_, errno(*__errno_location ())); | |||
1043 | } | |||
1044 | return Status::OK(); | |||
1045 | } | |||
1046 | ||||
1047 | Status PosixRandomRWFile::Fsync() { | |||
1048 | if (fsync(fd_) < 0) { | |||
1049 | return IOError("While fsync random read/write file", filename_, errno(*__errno_location ())); | |||
1050 | } | |||
1051 | return Status::OK(); | |||
1052 | } | |||
1053 | ||||
1054 | Status PosixRandomRWFile::Close() { | |||
1055 | if (close(fd_) < 0) { | |||
1056 | return IOError("While close random read/write file", filename_, errno(*__errno_location ())); | |||
1057 | } | |||
1058 | fd_ = -1; | |||
1059 | return Status::OK(); | |||
1060 | } | |||
1061 | ||||
1062 | PosixMemoryMappedFileBuffer::~PosixMemoryMappedFileBuffer() { | |||
1063 | // TODO should have error handling though not much we can do... | |||
1064 | munmap(this->base_, length_); | |||
1065 | } | |||
1066 | ||||
1067 | /* | |||
1068 | * PosixDirectory | |||
1069 | */ | |||
1070 | ||||
1071 | PosixDirectory::~PosixDirectory() { close(fd_); } | |||
1072 | ||||
1073 | Status PosixDirectory::Fsync() { | |||
1074 | #ifndef OS_AIX | |||
1075 | if (fsync(fd_) == -1) { | |||
1076 | return IOError("While fsync", "a directory", errno(*__errno_location ())); | |||
1077 | } | |||
1078 | #endif | |||
1079 | return Status::OK(); | |||
1080 | } | |||
1081 | } // namespace rocksdb | |||
1082 | #endif |