Inclusion-Exclusion principle

When sets overlap, the Sum rule overcounts because shared elements are added more than once. The Inclusion-Exclusion principle corrects this by alternating between adding and subtracting the cardinalities of progressively deeper intersections: add singles, subtract pairs, add triples, and so on.

For two sets:

For three:

Here’s an implementation that generalises to any number of sets.

# 1. Setup
import itertools as it
 
# define patients' cohorts
cohort_cardiac = {"P001", "P003", "P007", "P012", "P015", "P018", "P022"}
cohort_respiratory = {"P002", "P003", "P005", "P009", "P012", "P019"}
cohort_metabolic = {"P004", "P007", "P010", "P015", "P020"}
 
# collect them in tuple
cohorts = (cohort_cardiac, cohort_respiratory, cohort_metabolic)
 
# 2. Define functions
 
# intersection of variable length tuples of sets
def intersect(tup):
    inner_list = list(tup)
    intersection = inner_list.pop()
    for _i in range(len(inner_list), 0, -1):
        item = inner_list.pop()
        intersection = intersection & item
    return intersection
 
def include_exclude(cohorts):
    _u = 0
    for _i in range(0, len(cohorts)):
        _k = _i + 1
        combinations = it.combinations(cohorts, _k)
        if _k % 2 == 1:
            for c in combinations:
                _u = _u + len(intersect(c))
        else:
            for c in combinations:
                _u = _u + len(intersect(c)) * -1
    return _u
    
# 3. Apply
 
print(f"Cardinality of union: {include_exclude(cohorts)}")