aboutsummaryrefslogtreecommitdiffstats
path: root/module/zfs/vdev_indirect_births.c
blob: 65a57e73604f9b46d065856a6043c07b85e64dfa (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
/*
 * CDDL HEADER START
 *
 * This file and its contents are supplied under the terms of the
 * Common Development and Distribution License ("CDDL"), version 1.0.
 * You may only use this file in accordance with the terms of version
 * 1.0 of the CDDL.
 *
 * A full copy of the text of the CDDL should have accompanied this
 * source.  A copy of the CDDL is also available via the Internet at
 * http://www.illumos.org/license/CDDL.
 *
 * CDDL HEADER END
 */

/*
 * Copyright (c) 2015 by Delphix. All rights reserved.
 */

#include <sys/dmu_tx.h>
#include <sys/spa.h>
#include <sys/dmu.h>
#include <sys/dsl_pool.h>
#include <sys/vdev_indirect_births.h>

#ifdef ZFS_DEBUG
static boolean_t
vdev_indirect_births_verify(vdev_indirect_births_t *vib)
{
	ASSERT(vib != NULL);

	ASSERT(vib->vib_object != 0);
	ASSERT(vib->vib_objset != NULL);
	ASSERT(vib->vib_phys != NULL);
	ASSERT(vib->vib_dbuf != NULL);

	EQUIV(vib->vib_phys->vib_count > 0, vib->vib_entries != NULL);

	return (B_TRUE);
}
#else
#define	vdev_indirect_births_verify(vib) ((void) sizeof (vib), B_TRUE)
#endif

uint64_t
vdev_indirect_births_count(vdev_indirect_births_t *vib)
{
	ASSERT(vdev_indirect_births_verify(vib));

	return (vib->vib_phys->vib_count);
}

uint64_t
vdev_indirect_births_object(vdev_indirect_births_t *vib)
{
	ASSERT(vdev_indirect_births_verify(vib));

	return (vib->vib_object);
}

static uint64_t
vdev_indirect_births_size_impl(vdev_indirect_births_t *vib)
{
	return (vib->vib_phys->vib_count * sizeof (*vib->vib_entries));
}

void
vdev_indirect_births_close(vdev_indirect_births_t *vib)
{
	ASSERT(vdev_indirect_births_verify(vib));

	if (vib->vib_phys->vib_count > 0) {
		uint64_t births_size = vdev_indirect_births_size_impl(vib);

		vmem_free(vib->vib_entries, births_size);
		vib->vib_entries = NULL;
	}

	dmu_buf_rele(vib->vib_dbuf, vib);

	vib->vib_objset = NULL;
	vib->vib_object = 0;
	vib->vib_dbuf = NULL;
	vib->vib_phys = NULL;

	kmem_free(vib, sizeof (*vib));
}

uint64_t
vdev_indirect_births_alloc(objset_t *os, dmu_tx_t *tx)
{
	ASSERT(dmu_tx_is_syncing(tx));

	return (dmu_object_alloc(os,
	    DMU_OTN_UINT64_METADATA, SPA_OLD_MAXBLOCKSIZE,
	    DMU_OTN_UINT64_METADATA, sizeof (vdev_indirect_birth_phys_t),
	    tx));
}

vdev_indirect_births_t *
vdev_indirect_births_open(objset_t *os, uint64_t births_object)
{
	vdev_indirect_births_t *vib = kmem_zalloc(sizeof (*vib), KM_SLEEP);

	vib->vib_objset = os;
	vib->vib_object = births_object;

	VERIFY0(dmu_bonus_hold(os, vib->vib_object, vib, &vib->vib_dbuf));
	vib->vib_phys = vib->vib_dbuf->db_data;

	if (vib->vib_phys->vib_count > 0) {
		uint64_t births_size = vdev_indirect_births_size_impl(vib);
		vib->vib_entries = vmem_alloc(births_size, KM_SLEEP);
		VERIFY0(dmu_read(vib->vib_objset, vib->vib_object, 0,
		    births_size, vib->vib_entries, DMU_READ_PREFETCH));
	}

	ASSERT(vdev_indirect_births_verify(vib));

	return (vib);
}

void
vdev_indirect_births_free(objset_t *os, uint64_t object, dmu_tx_t *tx)
{
	VERIFY0(dmu_object_free(os, object, tx));
}

void
vdev_indirect_births_add_entry(vdev_indirect_births_t *vib,
    uint64_t max_offset, uint64_t txg, dmu_tx_t *tx)
{
	vdev_indirect_birth_entry_phys_t vibe;
	uint64_t old_size;
	uint64_t new_size;
	vdev_indirect_birth_entry_phys_t *new_entries;

	ASSERT(dmu_tx_is_syncing(tx));
	ASSERT(dsl_pool_sync_context(dmu_tx_pool(tx)));
	ASSERT(vdev_indirect_births_verify(vib));

	dmu_buf_will_dirty(vib->vib_dbuf, tx);

	vibe.vibe_offset = max_offset;
	vibe.vibe_phys_birth_txg = txg;

	old_size = vdev_indirect_births_size_impl(vib);
	dmu_write(vib->vib_objset, vib->vib_object, old_size, sizeof (vibe),
	    &vibe, tx);
	vib->vib_phys->vib_count++;
	new_size = vdev_indirect_births_size_impl(vib);

	new_entries = vmem_alloc(new_size, KM_SLEEP);
	if (old_size > 0) {
		memcpy(new_entries, vib->vib_entries, old_size);
		vmem_free(vib->vib_entries, old_size);
	}
	new_entries[vib->vib_phys->vib_count - 1] = vibe;
	vib->vib_entries = new_entries;
}

uint64_t
vdev_indirect_births_last_entry_txg(vdev_indirect_births_t *vib)
{
	ASSERT(vdev_indirect_births_verify(vib));
	ASSERT(vib->vib_phys->vib_count > 0);

	vdev_indirect_birth_entry_phys_t *last =
	    &vib->vib_entries[vib->vib_phys->vib_count - 1];
	return (last->vibe_phys_birth_txg);
}

/*
 * Return the txg in which the given range was copied (i.e. its physical
 * birth txg).  The specified offset+asize must be contiguously mapped
 * (i.e. not a split block).
 *
 * The entries are sorted by increasing phys_birth, and also by increasing
 * offset.  We find the specified offset by binary search.  Note that we
 * can not use bsearch() because looking at each entry independently is
 * insufficient to find the correct entry.  Each entry implicitly relies
 * on the previous entry: an entry indicates that the offsets from the
 * end of the previous entry to the end of this entry were written in the
 * specified txg.
 */
uint64_t
vdev_indirect_births_physbirth(vdev_indirect_births_t *vib, uint64_t offset,
    uint64_t asize)
{
	vdev_indirect_birth_entry_phys_t *base;
	vdev_indirect_birth_entry_phys_t *last;

	ASSERT(vdev_indirect_births_verify(vib));
	ASSERT(vib->vib_phys->vib_count > 0);

	base = vib->vib_entries;
	last = base + vib->vib_phys->vib_count - 1;

	ASSERT3U(offset, <, last->vibe_offset);

	while (last >= base) {
		vdev_indirect_birth_entry_phys_t *p =
		    base + ((last - base) / 2);
		if (offset >= p->vibe_offset) {
			base = p + 1;
		} else if (p == vib->vib_entries ||
		    offset >= (p - 1)->vibe_offset) {
			ASSERT3U(offset + asize, <=, p->vibe_offset);
			return (p->vibe_phys_birth_txg);
		} else {
			last = p - 1;
		}
	}
	ASSERT(!"offset not found");
	return (-1);
}

#if defined(_KERNEL)
EXPORT_SYMBOL(vdev_indirect_births_add_entry);
EXPORT_SYMBOL(vdev_indirect_births_alloc);
EXPORT_SYMBOL(vdev_indirect_births_close);
EXPORT_SYMBOL(vdev_indirect_births_count);
EXPORT_SYMBOL(vdev_indirect_births_free);
EXPORT_SYMBOL(vdev_indirect_births_last_entry_txg);
EXPORT_SYMBOL(vdev_indirect_births_object);
EXPORT_SYMBOL(vdev_indirect_births_open);
EXPORT_SYMBOL(vdev_indirect_births_physbirth);
#endif