Log message:
- Several small bug fixes
-- More correct method of leaving CPG (on suspend)
-- close log file desc after finished using
-- fix problem with overlapping recoveries
-- clean-up postsuspend so remote requests do not get lost
-- missing 'break' statement causing seg fault
-- better error checking
--- cluster/cmirror/src/Attic/cluster.c 2007/11/19 18:00:20 1.1.2.7
+++ cluster/cmirror/src/Attic/cluster.c 2008/01/14 22:52:17 1.1.2.8
@@ -162,7 +162,7 @@
* Errors from previous functions are in the tfr struct.
*/
- LOG_DBG("Sending respose to %u on cluster: [%s/%llu]",
+ LOG_DBG("Sending response to %u on cluster: [%s/%llu]",
tfr->originator,
RQ_TYPE(tfr->request_type & ~DM_CLOG_RESPONSE),
tfr->seq);
@@ -704,6 +704,12 @@
if (!match->valid) {
LOG_DBG("Log not valid yet, storing request");
startup_tfr = queue_remove(free_queue);
+ if (!startup_tfr) {
+ LOG_ERROR("Supply of transfer structs exhausted");
+ r = -ENOMEM; /* FIXME: Better error #? */
+ goto out;
+ }
+
memcpy(startup_tfr, tfr, sizeof(*tfr) + tfr->data_size);
queue_add_tail(startup_tfr, match->startup_queue);
goto out;
@@ -724,7 +730,7 @@
struct cpg_address *left_list, int left_list_entries,
struct cpg_address *joined_list, int joined_list_entries)
{
- int i, j;
+ int i, j, fd;
int my_pid = getpid();
int found = 0;
struct clog_cpg *match, *tmp;
@@ -740,8 +746,8 @@
LOG_PRINT("* MEMBERS (%d):", member_list_entries);
for (i = 0; i < member_list_entries; i++)
- LOG_PRINT("* [%d] nodeid: %d, pid: %d",
- i, member_list[i].nodeid, member_list[i].pid);
+ LOG_PRINT("* nodeid: %d, pid: %d",
+ member_list[i].nodeid, member_list[i].pid);
LOG_PRINT("* LEAVING (%d):", left_list_entries);
for (i = 0; i < left_list_entries; i++)
@@ -768,6 +774,46 @@
goto out;
}
+ /* Am I leaving? */
+ for (i = 0; i < left_list_entries; i++)
+ if (my_cluster_id == left_list[i].nodeid) {
+ struct clog_tfr *tfr;
+
+ LOG_DBG("Finalizing leave...");
+ list_del_init(&match->list);
+
+ cpg_fd_get(match->handle, &fd);
+ links_unregister(fd);
+
+ cluster_postsuspend(match->name.value);
+
+ while (!queue_empty(cluster_queue)) {
+ tfr = queue_remove(cluster_queue);
+
+ /*
+ * A postsuspend is place directly into
+ * the cluster_queue, without going out
+ * to the cluster. This means that only
+ * our postsuspend will ever exist in the
+ * cluster_queue.
+ */
+ if (tfr->request_type == DM_CLOG_POSTSUSPEND)
+ kernel_send(tfr);
+ else
+ queue_add(tfr, free_queue);
+ }
+
+ cpg_finalize(match->handle);
+
+ if (match->startup_queue->count)
+ LOG_ERROR("Startup items remain in cluster log");
+
+ free(match->startup_queue);
+ free(match);
+
+ goto out;
+ }
+
/* Am I the very first to join? */
if (!left_list_entries &&
(member_list_entries == 1) && (joined_list_entries == 1) &&
@@ -887,7 +933,7 @@
- if (!lc)
- return -EINVAL;
-
- destroy_cluster_cpg(tfr->uuid);
+ if (!lc) {
+ /* Is the log in the pending list? */
+ lc = get_pending_log(tfr->uuid);
+ if (!lc) {
+ LOG_ERROR("clog_dtr called on log that is not official or pending");
+ return -EINVAL;
+ }
+ } else {
+ LOG_DBG("[%s] clog_dtr: leaving CPG", SHORT_UUID(lc->uuid));
+ /*
+ * If postsuspend had done the destroy_cluster_cpg,
+ * the log context would be in the pending list
+ */
+ destroy_cluster_cpg(tfr->uuid);
+ }
LOG_PRINT("Cluster log removed (%s)", lc->uuid);
list_del_init(&lc->list);
+ if (lc->disk_fd != -1)
+ close(lc->disk_fd);
+ if (lc->disk_buffer)
+ free(lc->disk_buffer);
free(lc->clean_bits);
free(lc->sync_bits);
free(lc);
@@ -634,6 +663,7 @@
LOG_DBG("WARNING: log still marked as 'touched' during suspend");
*rtn = log_test_bit(lc->sync_bits, region);
if (*rtn)
- LOG_DBG(" Region is in-sync: %llu", region);
+ LOG_DBG("[%s] Region is in-sync: %llu",
+ SHORT_UUID(lc->uuid), region);
else
- LOG_DBG(" Region is not in-sync: %llu", region);
+ LOG_DBG("[%s] Region is not in-sync: %llu",
+ SHORT_UUID(lc->uuid), region);
tfr->data_size = sizeof(*rtn);
@@ -879,14 +941,6 @@
if (!lc)
return -EINVAL;
- /*
- * Are we trying to flush when a mark request conflicts
- * with a recovering region?
- */
- if ((lc->recovering_region != -1) &&
- !log_test_bit(lc->clean_bits, lc->recovering_region))
- return -EAGAIN;
-
/*
* Actual disk flush happens in 'commit_log()'
* Clear LOG_CHANGED and set LOG_FLUSH
@@ -992,8 +1046,8 @@
srsm_count_var = 0;
mark_list = rbt_search_plus(&lc->mark_tree, ®ion, srsm_count, &who);
if (!mark_list || !srsm_count_var) {
- LOG_DBG("Clear issued on region that is not marked: %llu/%u",
- region, who);
+ LOG_DBG("[%s] Clear issued by %u on region not marked: %llu",
+ SHORT_UUID(lc->uuid), who, region);
goto set_bit;
}
@@ -1082,14 +1136,14 @@
* FIXME: handle intermittent errors during recovery
* by resetting sync_search... but not to many times.
*/
- LOG_DBG(" Recovery has finished");
+ LOG_DBG("[%s] Recovery has finished", SHORT_UUID(lc->uuid));
pkg->i = 0;
return 0;
}
if (lc->recovering_region != (uint64_t)-1) {
- LOG_DBG("Someone is already recovering region %Lu",
- lc->recovering_region);
+ LOG_DBG("[%s] Someone is already recovering region %Lu",
+ SHORT_UUID(lc->uuid), lc->recovering_region);
pkg->i = 0;
return 0;
}
@@ -1104,14 +1158,17 @@
free(del);
if (!log_test_bit(lc->sync_bits, pkg->r)) {
- LOG_DBG("Assigning priority resync work to %u: %llu",
- tfr->originator, pkg->r);
+ LOG_DBG("[%s] Assigning priority resync work to %u: %llu",
+ SHORT_UUID(lc->uuid), tfr->originator, pkg->r);
#ifdef DEBUG
- LOG_DBG("Priority work remaining:");
+ LOG_DBG("[%s] Priority work remaining:",
+ SHORT_UUID(lc->uuid));
for (del = lc->recovery_request_list; del; del = del->next)
- LOG_DBG(" %llu", del->region);
+ LOG_DBG("[%s] %llu", SHORT_UUID(lc->uuid),
+ del->region);
#endif
pkg->i = 1;
+ lc->recovering_region = pkg->r;
return 0;
}
}
@@ -1127,8 +1184,10 @@
lc->sync_search = pkg->r + 1;
- LOG_DBG(" Assigning resync work: region = %llu
", pkg->r);
+ LOG_DBG("[%s] Assigning resync work to %u: region = %llu
",
+ SHORT_UUID(lc->uuid), tfr->originator, pkg->r);
pkg->i = 1;
+ lc->recovering_region = pkg->r;
return 0;
}
@@ -1153,10 +1212,16 @@
} else {
log_set_bit(lc, lc->sync_bits, pkg->region);
lc->sync_count++;
+ LOG_DBG("[%s] sync_count = %llu, Region %llu marked in-sync by %u",
+ SHORT_UUID(lc->uuid), lc->sync_count,
+ pkg->region, tfr->originator);
}
} else if (log_test_bit(lc->sync_bits, pkg->region)) {
lc->sync_count--;
log_clear_bit(lc, lc->sync_bits, pkg->region);
+ LOG_DBG("[%s] sync_count = %llu, Region %llu marked not in-sync by %u",
+ SHORT_UUID(lc->uuid), lc->sync_count,
+ pkg->region, tfr->originator);
}
r = kernel_recv_helper(*tfr, DM_CLOG_TFR_SIZE);
if (r) {
@@ -158,6 +158,7 @@
case DM_CLOG_DTR:
case DM_CLOG_STATUS_INFO:
case DM_CLOG_STATUS_TABLE:
+ case DM_CLOG_PRESUSPEND:
r = do_request(tfr);
if (r)
LOG_DBG("Returning failed request to kernel [%s]",
@@ -168,6 +169,19 @@
RQ_TYPE(tfr->request_type));
break;
+ case DM_CLOG_POSTSUSPEND:
+ r = do_request(tfr);
+ if (r) {
+ LOG_DBG("Returning failed request to kernel [%s]",
+ RQ_TYPE(tfr->request_type));
+ r = kernel_send(tfr);
+ if (r)
+ LOG_ERROR("Failed to respond to kernel [%s]",
+ RQ_TYPE(tfr->request_type));
+ }
+ queue_add_tail(tfr, cluster_queue);
+
+ break;
case DM_CLOG_RESUME:
/*
* Resume is a special case that requires a local
@@ -189,6 +203,9 @@
/* Add before send_to_cluster, so cluster code can find it */
queue_add_tail(tfr, cluster_queue);
r = cluster_send(tfr);
+ if (r)
+ LOG_ERROR("Unable to send request to cluster: %s",
+ strerror(-r));
break;
}
@@ -298,7 +315,7 @@
r = fcntl(cn_fd, F_SETFL, FNDELAY);
*/