1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
|
=== modified file 'mysql-test/suite/rpl/r/rpl_parallel.result'
--- mysql-test/suite/rpl/r/rpl_parallel.result 2014-11-13 09:31:20 +0000
+++ mysql-test/suite/rpl/r/rpl_parallel.result 2014-12-01 12:53:57 +0000
@@ -972,6 +972,54 @@
SET GLOBAL slave_parallel_threads=0;
SET GLOBAL slave_parallel_threads=10;
include/start_slave.inc
+*** MDEV-7237: Parallel replication: incorrect relaylog position after stop/start the slave ***
+INSERT INTO t2 VALUES (40);
+include/stop_slave.inc
+CHANGE MASTER TO master_use_gtid=no;
+SET @old_dbug= @@GLOBAL.debug_dbug;
+SET GLOBAL debug_dbug="+d,rpl_parallel_scheduled_gtid_0_x_100";
+SET GLOBAL debug_dbug="+d,rpl_parallel_wait_for_done_trigger";
+SET GLOBAL slave_parallel_threads=0;
+SET GLOBAL slave_parallel_threads=10;
+INSERT INTO t2 VALUES (41);
+INSERT INTO t2 VALUES (42);
+DELETE FROM t2 WHERE a=40;
+INSERT INTO t2 VALUES (43);
+INSERT INTO t2 VALUES (44);
+FLUSH LOGS;
+INSERT INTO t2 VALUES (45);
+SET gtid_seq_no=100;
+INSERT INTO t2 VALUES (46);
+BEGIN;
+SELECT * FROM t2 WHERE a=40 FOR UPDATE;
+a
+40
+include/start_slave.inc
+SET debug_sync= 'now WAIT_FOR scheduled_gtid_0_x_100';
+STOP SLAVE;
+SET debug_sync= 'now WAIT_FOR wait_for_done_waiting';
+ROLLBACK;
+include/wait_for_slave_sql_to_stop.inc
+SELECT * FROM t2 WHERE a >= 40 ORDER BY a;
+a
+41
+42
+include/start_slave.inc
+SELECT * FROM t2 WHERE a >= 40 ORDER BY a;
+a
+41
+42
+43
+44
+45
+46
+include/stop_slave.inc
+SET GLOBAL debug_dbug=@old_dbug;
+SET DEBUG_SYNC= 'RESET';
+SET GLOBAL slave_parallel_threads=0;
+SET GLOBAL slave_parallel_threads=10;
+CHANGE MASTER TO master_use_gtid=slave_pos;
+include/start_slave.inc
include/stop_slave.inc
SET GLOBAL slave_parallel_threads=@old_parallel_threads;
include/start_slave.inc
=== modified file 'mysql-test/suite/rpl/t/rpl_parallel.test'
--- mysql-test/suite/rpl/t/rpl_parallel.test 2014-11-13 09:31:20 +0000
+++ mysql-test/suite/rpl/t/rpl_parallel.test 2014-12-01 12:53:57 +0000
@@ -1535,6 +1535,99 @@
--source include/start_slave.inc
+--echo *** MDEV-7237: Parallel replication: incorrect relaylog position after stop/start the slave ***
+--connection server_1
+INSERT INTO t2 VALUES (40);
+--save_master_pos
+
+--connection server_2
+--sync_with_master
+--source include/stop_slave.inc
+CHANGE MASTER TO master_use_gtid=no;
+SET @old_dbug= @@GLOBAL.debug_dbug;
+# This DBUG injection causes a DEBUG_SYNC signal "scheduled_gtid_0_x_100" when
+# GTID 0-1-100 has been scheduled for and fetched by a worker thread.
+SET GLOBAL debug_dbug="+d,rpl_parallel_scheduled_gtid_0_x_100";
+# This DBUG injection causes a DEBUG_SYNC signal "wait_for_done_waiting" when
+# STOP SLAVE has signalled all worker threads to stop.
+SET GLOBAL debug_dbug="+d,rpl_parallel_wait_for_done_trigger";
+# Reset worker threads to make DBUG setting catch on.
+SET GLOBAL slave_parallel_threads=0;
+SET GLOBAL slave_parallel_threads=10;
+
+
+--connection server_1
+# Setup some transaction for the slave to replicate.
+INSERT INTO t2 VALUES (41);
+INSERT INTO t2 VALUES (42);
+DELETE FROM t2 WHERE a=40;
+INSERT INTO t2 VALUES (43);
+INSERT INTO t2 VALUES (44);
+# Force the slave to switch to a new relay log file.
+FLUSH LOGS;
+INSERT INTO t2 VALUES (45);
+# Inject a GTID 0-1-100, which will trigger a DEBUG_SYNC signal when this
+# transaction has been fetched by a worker thread.
+SET gtid_seq_no=100;
+INSERT INTO t2 VALUES (46);
+--save_master_pos
+
+--connection con_temp2
+# Temporarily block the DELETE on a=40 from completing.
+BEGIN;
+SELECT * FROM t2 WHERE a=40 FOR UPDATE;
+
+
+--connection server_2
+--source include/start_slave.inc
+
+# The DBUG injection set above will make the worker thread signal the following
+# debug_sync when the GTID 0-1-100 has been reached by a worker thread.
+# Thus, at this point, the SQL driver thread has reached the next
+# relay log file name, while a worker thread is still processing a
+# transaction in the previous relay log file, blocked on the SELECT FOR
+# UPDATE.
+SET debug_sync= 'now WAIT_FOR scheduled_gtid_0_x_100';
+# At this point, the SQL driver thread is in the new relay log file, while
+# the DELETE from the old relay log file is not yet complete. We will stop
+# the slave at this point. The bug was that the DELETE statement would
+# update the slave position to the _new_ relay log file name instead of
+# its own old file name. Thus, by stoping and restarting the slave at this
+# point, we would get an error at restart due to incorrect position. (If
+# we would let the slave catch up before stopping, the incorrect position
+# would be corrected by a later transaction).
+
+send STOP SLAVE;
+
+--connection con_temp2
+# Wait for STOP SLAVE to have proceeded sufficiently that it has signalled
+# all worker threads to stop; this ensures that we will stop after the DELETE
+# transaction (and not after a later transaction that might have been able
+# to set a fixed position).
+SET debug_sync= 'now WAIT_FOR wait_for_done_waiting';
+# Now release the row lock that was blocking the replication of DELETE.
+ROLLBACK;
+
+--connection server_2
+reap;
+--source include/wait_for_slave_sql_to_stop.inc
+SELECT * FROM t2 WHERE a >= 40 ORDER BY a;
+# Now restart the slave. With the bug present, this would start at an
+# incorrect relay log position, causing relay log read error (or if unlucky,
+# silently skip a number of events).
+--source include/start_slave.inc
+--sync_with_master
+SELECT * FROM t2 WHERE a >= 40 ORDER BY a;
+--source include/stop_slave.inc
+SET GLOBAL debug_dbug=@old_dbug;
+SET DEBUG_SYNC= 'RESET';
+SET GLOBAL slave_parallel_threads=0;
+SET GLOBAL slave_parallel_threads=10;
+CHANGE MASTER TO master_use_gtid=slave_pos;
+--source include/start_slave.inc
+
+
+# Clean up.
--connection server_2
--source include/stop_slave.inc
SET GLOBAL slave_parallel_threads=@old_parallel_threads;
=== modified file 'sql/rpl_parallel.cc'
--- sql/rpl_parallel.cc 2014-11-17 11:42:02 +0000
+++ sql/rpl_parallel.cc 2014-12-01 12:53:57 +0000
@@ -631,6 +631,14 @@
PSI_stage_info old_stage;
uint64 wait_count;
+ DBUG_EXECUTE_IF("rpl_parallel_scheduled_gtid_0_x_100", {
+ if (rgi->current_gtid.domain_id == 0 &&
+ rgi->current_gtid.seq_no == 100) {
+ debug_sync_set_action(thd,
+ STRING_WITH_LEN("now SIGNAL scheduled_gtid_0_x_100"));
+ }
+ });
+
in_event_group= true;
/*
If the standalone flag is set, then this event group consists of a
@@ -1131,9 +1131,9 @@
inuse_relaylog *ir= accumulated_ir_last;
if (ir)
{
- my_atomic_rwlock_wrlock(&ir->rli->inuse_relaylog_atomic_lock);
+ my_atomic_rwlock_wrlock(&ir->inuse_relaylog_atomic_lock);
my_atomic_add64(&ir->dequeued_count, accumulated_ir_count);
- my_atomic_rwlock_wrunlock(&ir->rli->inuse_relaylog_atomic_lock);
+ my_atomic_rwlock_wrunlock(&ir->inuse_relaylog_atomic_lock);
accumulated_ir_count= 0;
accumulated_ir_last= NULL;
}
=== modified file 'sql/rpl_rli.cc'
--- sql/rpl_rli.cc 2014-11-25 11:19:48 +0000
+++ sql/rpl_rli.cc 2014-12-01 12:53:57 +0000
@@ -986,11 +986,11 @@
if (rgi->is_parallel_exec)
{
/* In case of parallel replication, do not update the position backwards. */
- int cmp= strcmp(group_relay_log_name, event_relay_log_name);
+ int cmp= strcmp(group_relay_log_name, rgi->event_relay_log_name);
if (cmp < 0)
{
group_relay_log_pos= rgi->future_event_relay_log_pos;
- strmake_buf(group_relay_log_name, event_relay_log_name);
+ strmake_buf(group_relay_log_name, rgi->event_relay_log_name);
notify_group_relay_log_name_update();
} else if (cmp == 0 && group_relay_log_pos < rgi->future_event_relay_log_pos)
group_relay_log_pos= rgi->future_event_relay_log_pos;
|