فهرست منبع

p_usrloc: reworking mdb_availability_control

- simplify general implementation
- avoid deadlock due to a process trying to acquire same lock twice

(cherry picked from commit 5789c506d022dd5713072cd1fcc07a25f42e098c)
Lucian Balaceanu 5 سال پیش
والد
کامیت
ffb0576bd7

+ 10 - 13
src/modules/p_usrloc/p_usrloc_mod.c

@@ -185,7 +185,7 @@ str default_db_type   = str_init(DEFAULT_DB_TYPE);
 str domain_db         = str_init(DEFAULT_DOMAIN_DB);
 int default_dbt       = 0;
 int expire            = 0;
-db_shared_param_t *write_on_master_db_shared;
+int *mdb_w_available;
 
 /*! \brief
  * Exported functions
@@ -311,13 +311,6 @@ static int mod_init(void)
 	}
 #endif
 
-	if((write_on_master_db_shared = shm_malloc(sizeof(db_shared_param_t))) == NULL) {
-		LM_ERR("couldn't allocate shared memory.\n");
-		return -1;
-	} else {
-		write_on_master_db_shared->val = db_master_write;
-	}
-
 	if(ul_hash_size<=1)
 		ul_hash_size = 512;
 	else
@@ -406,14 +399,18 @@ static int mod_init(void)
 		LM_ERR("could not init database watch environment.\n");
 		return -1;
 	}
-	if (lock_init(&write_on_master_db_shared->lock)==0){
-		LM_ERR("could not initialise lock\n");
+
+	if((mdb_w_available = shm_malloc(sizeof(int))) == NULL) {
+		LM_ERR("couldn't allocate shared memory. \n");
+		return -1;
 	}
-	if(write_on_master_db_shared->val){
+	if (db_master_write) {
 		/* register extra dummy timer to be created in init_db_check() */
 		register_dummy_timers(1);
+		if (mdb_availability_control) {
+			check_master_db();
+		}
 	}
-        check_master_db(db_master_write);
 	return 0;
 }
 
@@ -422,7 +419,7 @@ static int child_init(int _rank)
 {
 	if(_rank==PROC_INIT) {
 		if(init_db_check() < 0){
-				LM_ERR("could not initialise database check.\n");
+			LM_ERR("could not initialise database check.\n");
 			return -1;
 		}
 		return 0;

+ 1 - 5
src/modules/p_usrloc/p_usrloc_mod.h

@@ -123,11 +123,7 @@ extern int connection_expires;
 extern int alg_location;
 
 extern int  max_loc_nr;
-typedef struct db_shared_param {
-	int val;
-	gen_lock_t lock;
-} db_shared_param_t;
-extern db_shared_param_t *write_on_master_db_shared;
+extern int * mdb_w_available;
 extern int mdb_availability_control;
 
 #endif /* UL_MOD_H */

+ 42 - 28
src/modules/p_usrloc/ul_db.c

@@ -60,7 +60,7 @@ int ul_db_init(void) {
 	
 	memset(results, 0, sizeof(results));
 
-	if(write_on_master_db_shared->val){
+	if(db_master_write){
 		if(db_bind_mod(mdb.write.url, &mdb.write.dbf) < 0) {
 			LM_ERR("could not bind api for write db.\n");
 			return -1;
@@ -102,16 +102,19 @@ int ul_db_child_init(void) {
 	if(ul_db_child_locnr_init() == -1) return -1;
 	
 	LM_INFO("location number is %d\n", max_loc_nr);
-        lock_get(&write_on_master_db_shared->lock);
-	if(write_on_master_db_shared->val){
+	if(db_master_write){
 		if((mdb.write.dbh  = mdb.write.dbf.init(mdb.write.url)) == NULL) {
-			LM_ERR("could not connect to sip master db (write).\n");
-			lock_release(&write_on_master_db_shared->lock);
-			return -1;
+			if (mdb_availability_control) {
+				LM_INFO("starting with no connection to sip master db write\n");
+				return 0;
+			}
+			else {
+				LM_ERR("could not connect to sip master db (write).\n");
+				return -1;
+			}
 		}
 		LM_INFO("write db connection for children initialized\n");
 	}
-	lock_release(&write_on_master_db_shared->lock);
 	return 0;
 }
 
@@ -138,6 +141,21 @@ void ul_db_shutdown(void) {
 	return;
 }
 
+int init_w_dbh(ul_master_db_t *write) {
+	if (mdb_availability_control) {
+		if (!(*mdb_w_available)) {
+			return -1;
+		}
+		if (write->dbh == NULL) {
+			if((write->dbh  = write->dbf.init(write->url)) == NULL) {
+				LM_ERR("Could not recreate connection to master write db.\n");
+				return -1;
+			}
+			LM_INFO("Recreated connection to master write db.\n");
+		}
+	}
+	return 0;
+}
 
 int db_handle_error(ul_db_handle_t * handle, int no) {
 	int query_len;
@@ -150,12 +168,9 @@ int db_handle_error(ul_db_handle_t * handle, int no) {
 		return -1;
 	}
 
-	lock_get(&write_on_master_db_shared->lock);
-	if(!write_on_master_db_shared->val){
-		lock_release(&write_on_master_db_shared->lock);
+	if (!db_master_write) {
 		return 0;
 	}
-	lock_release(&write_on_master_db_shared->lock);
 
 	query_len = 35 + reg_table.len
 			+ error_col.len * 2 + id_col.len;
@@ -186,7 +201,10 @@ int db_handle_error(ul_db_handle_t * handle, int no) {
 		tmp.s = query;
 		tmp.len = strlen(query);
 
-		if (mdb.write.dbf.raw_query (mdb.write.dbh, &tmp, NULL)) {
+		if (init_w_dbh(&mdb.write) < 0)
+			return -1;
+
+		if (mdb.write.dbf.raw_query(mdb.write.dbh, &tmp, NULL)) {
 			LM_ERR("error in database update.\n");
 			return -1;
 		}
@@ -209,6 +227,8 @@ int db_handle_error(ul_db_handle_t * handle, int no) {
 		handle->id, db->no, db->errors, cfg_get(p_usrloc, p_usrloc_cfg, db_err_threshold));
 	if(db->errors >= cfg_get(p_usrloc, p_usrloc_cfg, db_err_threshold)) {
 		LM_DBG("db_handle_error: now doing failover\n");
+		if (init_w_dbh(&mdb.write) < 0)
+			return -1;
 		if((db_failover(&mdb.write.dbf, mdb.write.dbh, handle, no)) < 0) {
 			LM_ERR("error in doing failover.\n");
 			return -1;
@@ -378,12 +398,9 @@ int ul_db_query(str * table, str * first, str * second, db1_con_t *** _r_h,
 		LM_ERR("could not retrieve db handle.\n");
 		return -1;
 	}
-	lock_get(&write_on_master_db_shared->lock);
-	if((ret = db_query(handle, _r_h, &f, table, _k, _op, _v, _c, _n, _nc, _o, _r, write_on_master_db_shared->val)) < 0){
-		lock_release(&write_on_master_db_shared->lock);
+	if((ret = db_query(handle, _r_h, &f, table, _k, _op, _v, _c, _n, _nc, _o, _r, db_master_write)) < 0){
 		return ret;
 	}
-	lock_release(&write_on_master_db_shared->lock);
 	add_dbf(*_r, f);
 	return ret;
 }
@@ -401,34 +418,31 @@ int ul_db_free_result(db1_con_t ** dbh, db1_res_t * res){
 }
 
 int db_reactivate(ul_db_handle_t * handle, int no){
-	lock_get(&write_on_master_db_shared->lock);
-	if(!write_on_master_db_shared->val){
-		lock_release(&write_on_master_db_shared->lock);
+	if(!db_master_write){
 		LM_ERR("running in read only mode, abort.\n");
 		return -1;
 	}
-	lock_release(&write_on_master_db_shared->lock);
+	if (init_w_dbh(&mdb.write) < 0)
+		return -1;
 	return db_failover_reactivate(&mdb.write.dbf, mdb.write.dbh, handle, no);
 }
 
 int db_reset_failover_time(ul_db_handle_t * handle, int no){
-	lock_get(&write_on_master_db_shared->lock);
-	if(!write_on_master_db_shared->val){
-		lock_release(&write_on_master_db_shared->lock);
+	if(!db_master_write){
 		LM_ERR("running in read only mode, abort.\n");
 		return -1;
 	}
-	lock_release(&write_on_master_db_shared->lock);
+	if (init_w_dbh(&mdb.write) < 0)
+		return -1;
 	return db_failover_reset(&mdb.write.dbf, mdb.write.dbh, handle->id, no);
 }
 
 int ul_db_check(ul_db_handle_t * handle){
-	lock_get(&write_on_master_db_shared->lock);
-	if(write_on_master_db_shared->val){
-		lock_release(&write_on_master_db_shared->lock);
+	if(db_master_write){
+		if (init_w_dbh(&mdb.write) < 0)
+			return -1;
 		return check_handle(&mdb.write.dbf, mdb.write.dbh, handle);
 	} else {
-		lock_release(&write_on_master_db_shared->lock);
 		LM_ERR("checking is useless in read-only mode\n");
 		return 0;
 	}

+ 7 - 8
src/modules/p_usrloc/ul_db_watch.c

@@ -97,8 +97,8 @@ void check_dbs(unsigned int ticks, void *param){
 	int found;
 	int i;
 
-	if(mdb_availability_control) {
-		check_master_db(db_master_write);
+	if (db_master_write && mdb_availability_control) {
+		check_master_db();
 	}
 	if(!list_lock){
 		return;
@@ -152,20 +152,19 @@ void check_dbs(unsigned int ticks, void *param){
 	lock_release(list_lock);
 }
 
-void check_master_db(int dbm_write_default) {
+void check_master_db() {
 	if(mdb.write.dbh){
 		mdb.write.dbf.close(mdb.write.dbh);
 		mdb.write.dbh = NULL;
 	}
 
-	lock_get(&write_on_master_db_shared->lock);
 	if((mdb.write.dbh  = mdb.write.dbf.init(mdb.write.url)) == NULL) {
-		write_on_master_db_shared->val = 0;
-		LM_WARN("Master db is unavailable.\n");
+		LM_INFO("Master db is unavailable.\n");
+		*mdb_w_available = 0;
 	} else {
-		write_on_master_db_shared->val = dbm_write_default;
+		LM_INFO("Master db is available.\n");
+		*mdb_w_available = 1;
 	}
-	lock_release(&write_on_master_db_shared->lock);
 }
 
 int ul_register_watch_db(int id){

+ 1 - 1
src/modules/p_usrloc/ul_db_watch.h

@@ -35,6 +35,6 @@ int ul_register_watch_db(int id);
 
 int ul_unregister_watch_db(int id);
 
-void check_master_db(int dbm_write_default);
+void check_master_db();
 
 #endif