Browse Source

ims_charging: fixed deadlock when interim CCA timeout occurs

Carlos Ruiz Diaz 12 years ago
parent
commit
fc4f2216f8

+ 2 - 0
modules/ims_charging/dialog.c

@@ -39,7 +39,9 @@ void dlg_reply(struct dlg_cell *dlg, int type, struct dlg_cb_params *_params) {
 			LM_ERR("Ro Session object is NULL...... aborting\n");
 			return;
 		}
+
 		ro_session_entry = &(ro_session_table->entries[session->h_entry]);
+
 		ro_session_lock(ro_session_table, ro_session_entry);
 
 		if (session->active) {

+ 11 - 1
modules/ims_charging/ims_ro.c

@@ -642,6 +642,16 @@ error:
     	cdpb.AAASessionsUnlock(auth->hash);
     	cdpb.AAADropCCAccSession(auth);
     }
+
+    shm_free(i_req);
+    //
+    // since callback function will be never called because of the error, we need to release the lock on the session
+    // to it can be reused later.
+    //
+    struct ro_session_entry *ro_session_entry = &(ro_session_table->entries[ro_session->h_entry]);
+    unref_ro_session_unsafe(ro_session, 1, ro_session_entry);//unref from the initial timer that fired this event.
+    ro_session_unlock(ro_session_table, ro_session_entry);
+
     return;
 }
 
@@ -1029,7 +1039,7 @@ static void resume_on_initial_ccr(int is_timeout, void *param, AAAMessage *cca,
     if (is_timeout) {
         update_stat(ccr_timeouts, 1);
         LM_ERR("Transaction timeout - did not get CCA\n");
-	error_code =  RO_RETURN_ERROR;
+        error_code =  RO_RETURN_ERROR;
         goto error0;
     }
 

+ 2 - 2
modules/ims_charging/ro_session_hash.h

@@ -78,7 +78,7 @@ extern struct ro_session_table *ro_session_table;
  * \param _entry locked entry
  */
 #define ro_session_lock(_table, _entry) \
-		lock_set_get( (_table)->locks, (_entry)->lock_idx);
+		{ LM_DBG("LOCKING %d", (_entry)->lock_idx); lock_set_get( (_table)->locks, (_entry)->lock_idx); LM_DBG("LOCKED %d", (_entry)->lock_idx);}
 
 
 /*!
@@ -87,7 +87,7 @@ extern struct ro_session_table *ro_session_table;
  * \param _entry locked entry
  */
 #define ro_session_unlock(_table, _entry) \
-		lock_set_release( (_table)->locks, (_entry)->lock_idx);
+		{ LM_DBG("UNLOCKING %d", (_entry)->lock_idx); lock_set_release( (_table)->locks, (_entry)->lock_idx); LM_DBG("UNLOCKED %d", (_entry)->lock_idx); }
 
 /*!
  * \brief Reference an ro_session without locking

+ 30 - 17
modules/ims_charging/ro_timer.c

@@ -256,19 +256,22 @@ void resume_ro_session_ontimeout(struct interim_ccr *i_req) {
 	time_t now = time(0);
 	time_t used_secs;
 	struct ro_session_entry *ro_session_entry = NULL;
+	int call_terminated = 0;
 
 	if (!i_req) {
 		LM_ERR("This is so wrong: i_req is NULL\n");
 		return;
 	}
 
+	ro_session_entry = &(ro_session_table->entries[i_req->ro_session->h_entry]);
+
 	LM_DBG("credit=%d credit_valid_for=%d", i_req->new_credit, i_req->credit_valid_for);
 
 	used_secs = now - i_req->ro_session->last_event_timestamp;
 
 	/* check to make sure diameter server is giving us sane values */
 	if (i_req->new_credit > i_req->credit_valid_for) {
-		LM_WARN("That's weird, Diameter server gave us credit with a lower validity period :D. Setting reserved time to validity perioud instead \n");
+		LM_WARN("That's weird, Diameter server gave us credit with a lower validity period :D. Setting reserved time to validity period instead \n");
 		i_req->new_credit = i_req->credit_valid_for;
 	}
 
@@ -321,8 +324,21 @@ void resume_ro_session_ontimeout(struct interim_ccr *i_req) {
 		i_req->ro_session->event_type = no_more_credit;
 		int whatsleft = i_req->ro_session->reserved_secs - used_secs;
 		if (whatsleft <= 0) {
-			LM_WARN("Immediately killing call due to no more credit\n");
+			// TODO we need to handle this situation more precisely.
+			// in case CCR times out, we get a call shutdown but the error message assumes it was due to a lack of credit.
+			//
+			LM_WARN("Immediately killing call due to no more credit *OR* no CCA received (timeout) after reservation request\n");
+
+			//
+			// we need to unlock the session or else we might get a deadlock on dlg_terminated() dialog callback.
+			// Do not unref the session because it will be made inside the dlg_terminated() function.
+			//
+
+			//unref_ro_session_unsafe(i_req->ro_session, 1, ro_session_entry);
+			ro_session_unlock(ro_session_table, ro_session_entry);
+
 			dlgb.lookup_terminate_dlg(i_req->ro_session->dlg_h_entry, i_req->ro_session->dlg_h_id, NULL );
+			call_terminated = 1;
 		}
 		else {
 			LM_DBG("No more credit for user - letting call run out of money in [%i] seconds", whatsleft);
@@ -337,10 +353,13 @@ void resume_ro_session_ontimeout(struct interim_ccr *i_req) {
 		}
 	}
 
-	ro_session_entry = &(ro_session_table->entries[i_req->ro_session->h_entry]);
-
-	unref_ro_session_unsafe(i_req->ro_session, 1, ro_session_entry);//unref from the initial timer that fired this event.
-	ro_session_unlock(ro_session_table, ro_session_entry);
+	//
+	// if call was forcefully terminated, the lock was released before dlgb.lookup_terminate_dlg() function call.
+	//
+	if (!call_terminated) {
+		unref_ro_session_unsafe(i_req->ro_session, 1, ro_session_entry);//unref from the initial timer that fired this event.
+		ro_session_unlock(ro_session_table, ro_session_entry);
+	}
 
 	shm_free(i_req);
 	LM_DBG("Exiting async ccr interim nicely");
@@ -350,26 +369,21 @@ void resume_ro_session_ontimeout(struct interim_ccr *i_req) {
  * If we cant we need to put a new timer to kill the call at the appropriate time
  */
 void ro_session_ontimeout(struct ro_tl *tl) {
-	time_t now,  used_secs, call_time;
+	time_t now, used_secs, call_time;
 
 	LM_DBG("We have a fired timer [p=%p] and tl=[%i].\n", tl, tl->timeout);
 
 	/* find the session id for this timer*/
 	struct ro_session_entry *ro_session_entry = NULL;
-
-	struct ro_session* ro_session;
-	ro_session = ((struct ro_session*) ((char *) (tl)
-			- (unsigned long) (&((struct ro_session*) 0)->ro_tl)));
+	struct ro_session* ro_session = ((struct ro_session*) ((char *) (tl) - (unsigned long) (&((struct ro_session*) 0)->ro_tl)));
 
 	if (!ro_session) {
 		LM_ERR("Can't find a session. This is bad");
 		return;
 	}
 
-//	LM_ALERT("LOCKING... ");	
 	ro_session_entry = &(ro_session_table->entries[ro_session->h_entry]);
 	ro_session_lock(ro_session_table, ro_session_entry);
-//	LM_ALERT("LOCKED!");
 	
 	LM_DBG("event-type=%d", ro_session->event_type);
 	
@@ -428,7 +442,6 @@ void ro_session_ontimeout(struct ro_tl *tl) {
 				ref_ro_session_unsafe(ro_session, 1);
 			}
 			LM_ERR("Immediately killing call due to unknown error\n");
-			dlgb.lookup_terminate_dlg(ro_session->dlg_h_entry, ro_session->dlg_h_id, NULL );
 		}
 
 		break;
@@ -439,15 +452,15 @@ void ro_session_ontimeout(struct ro_tl *tl) {
 			LM_INFO("Call/session must be ended - no more funds.\n");
 		else if (ro_session->event_type == unknown_error)
 			LM_ERR("last event caused an error. We will now tear down this session.\n");
-
-		dlgb.lookup_terminate_dlg(ro_session->dlg_h_entry, ro_session->dlg_h_id, NULL );
 	}
 
+
 	update_stat(killed_calls, 1);
 
-	unref_ro_session_unsafe(ro_session, 1, ro_session_entry); //unref from the initial timer that fired this event.
+	//unref_ro_session_unsafe(ro_session, 1, ro_session_entry); //unref from the initial timer that fired this event.
 	ro_session_unlock(ro_session_table, ro_session_entry);
 
+	dlgb.lookup_terminate_dlg(ro_session->dlg_h_entry, ro_session->dlg_h_id, NULL);
 	return;
 }