Classroom Glossary Public page

Lab 4: Physical-Layer Authentication -- RF Fingerprinting

296 words

Week: 5 -- RF Security Primitives
Points: 15
Time estimate: 60 min lab + 2 hr independent
Deliverable: lab-4-report.md + Python classifier


Objectives

  1. Capture transmissions from two same-make SDR units (or two channels of one ANT-SDR E200); extract CFO and I/Q imbalance as fingerprinting features.
  2. Build a simple distance-based classifier that distinguishes the two units from their RF fingerprint.
  3. Evaluate classification accuracy at multiple SNR levels.

Setup

Option A (two hardware units): Two ANT-SDR E200 units, or one HackRF transmitting and one ANT-SDR E200 receiving. Transmit a known pilot signal from each unit in turn; record the captured IQ from the receiver.

Option B (simulated): Use the I/Q imbalance model from Week 4 lecture to simulate two transmitters with different hardware parameters. This option is valid if only one SDR is available.


Part A: Feature Extraction (25 min)

#!/usr/bin/env python3
"""Lab 4 Part A: RF fingerprinting feature extraction."""
import numpy as np
import matplotlib.pyplot as plt
from scipy.signal import correlate

# ---- Pilot-based CFO estimation ----
def estimate_cfo(samples, pilot, fs):
    """Estimate carrier frequency offset from pilot correlation."""
    corr = correlate(samples[:4*len(pilot)], pilot, mode='valid')
    peak_idx = np.argmax(np.abs(corr))
    
    if peak_idx + len(pilot) < len(samples):
        seg1 = samples[peak_idx:peak_idx+len(pilot)]
        seg2 = samples[peak_idx+len(pilot):peak_idx+2*len(pilot)]
        if len(seg2) == len(pilot):
            corr1 = np.sum(seg1 * np.conj(pilot))
            corr2 = np.sum(seg2 * np.conj(pilot))
            phase_delta = np.angle(corr2 * np.conj(corr1))
            return phase_delta * fs / (2 * np.pi * len(pilot))
    return np.angle(corr[np.argmax(np.abs(corr))]) * fs / (2 * np.pi * len(pilot))

# ---- I/Q imbalance estimation ----
def estimate_iq_imbalance(x):
    """Estimate gain and phase imbalance from signal statistics."""
    I = x.real
    Q = x.imag
    E_II = np.mean(I**2)
    E_QQ = np.mean(Q**2)
    E_IQ = np.mean(I*Q)
    gain_db = 20*np.log10(np.sqrt(E_II/(E_QQ + 1e-12)))
    phase_deg = np.degrees(np.arcsin(np.clip(2*E_IQ/(E_II+E_QQ), -1, 1)))
    return gain_db, phase_deg

# ---- Simulate two SDR transmitters ----
np.random.seed(42)
fs = 1e6
N_samples = 10000

# Pilot: Zadoff-Chu sequence (same as LTE preamble -- good autocorrelation)
def zadoff_chu(N, u=1):
    """Zadoff-Chu sequence of length N with root u."""
    n = np.arange(N)
    if N % 2 == 0:
        return np.exp(-1j * np.pi * u * n * (n) / N)
    else:
        return np.exp(-1j * np.pi * u * n * (n+1) / N)

pilot = zadoff_chu(64)  # 64-sample pilot

# Transmitter A: CFO=+1.2 kHz, gain_imb=0.4 dB, phase_imb=1.8°
# Transmitter B: CFO=-0.7 kHz, gain_imb=-0.2 dB, phase_imb=3.1°
tx_params = {
    'A': {'cfo_hz': 1200.0, 'gain_db': 0.4, 'phase_deg': 1.8},
    'B': {'cfo_hz': -700.0, 'gain_db': -0.2, 'phase_deg': 3.1},
}

def generate_tx(cfo_hz, gain_db, phase_deg, N, fs, pilot, snr_db):
    """Generate received signal from a transmitter with given hardware params."""
    # Repeat pilot N times with random data in between
    signal = np.tile(pilot, N // len(pilot) + 1)[:N]
    
    # Apply CFO
    t = np.arange(N) / fs
    signal = signal * np.exp(2j * np.pi * cfo_hz * t)
    
    # Apply I/Q imbalance
    A = 10**(gain_db/20)
    phi = np.deg2rad(phase_deg)
    I = signal.real
    Q = signal.imag
    signal = I + 1j * (A * np.cos(phi) * Q + A * np.sin(phi) * I)
    
    # Add noise
    snr_linear = 10**(snr_db/10)
    signal_power = np.mean(np.abs(signal)**2)
    noise_std = np.sqrt(signal_power / (2 * snr_linear))
    noise = noise_std * (np.random.randn(N) + 1j*np.random.randn(N))
    
    return signal + noise

# Extract features at SNR = 20 dB (high SNR baseline)
snr_db = 20.0
n_captures = 50  # 50 captures per transmitter

features = {'A': [], 'B': []}
for tx_id, params in tx_params.items():
    for _ in range(n_captures):
        rx = generate_tx(
            params['cfo_hz'], params['gain_db'], params['phase_deg'],
            N_samples, fs, pilot, snr_db
        )
        cfo_est = estimate_cfo(rx, pilot, fs)
        g_est, p_est = estimate_iq_imbalance(rx)
        features[tx_id].append([cfo_est, g_est, p_est])

features = {k: np.array(v) for k, v in features.items()}

print("Feature statistics (mean ± std) at SNR=20 dB:")
for tx_id in ['A', 'B']:
    f = features[tx_id]
    print(f"\n  TX {tx_id}:")
    print(f"    CFO:         {np.mean(f[:,0]):8.1f} ± {np.std(f[:,0]):.1f} Hz")
    print(f"    Gain imb:    {np.mean(f[:,1]):8.3f} ± {np.std(f[:,1]):.3f} dB")
    print(f"    Phase imb:   {np.mean(f[:,2]):8.3f} ± {np.std(f[:,2]):.3f}°")

# Feature space visualization
plt.figure(figsize=(12, 4))
plt.subplot(1, 3, 1)
plt.scatter(features['A'][:,0], features['A'][:,1], label='TX A', alpha=0.6, s=20)
plt.scatter(features['B'][:,0], features['B'][:,1], label='TX B', alpha=0.6, s=20)
plt.xlabel('CFO (Hz)')
plt.ylabel('Gain imbalance (dB)')
plt.title('Feature Space: CFO vs Gain Imbalance')
plt.legend()

plt.subplot(1, 3, 2)
plt.scatter(features['A'][:,0], features['A'][:,2], label='TX A', alpha=0.6, s=20)
plt.scatter(features['B'][:,0], features['B'][:,2], label='TX B', alpha=0.6, s=20)
plt.xlabel('CFO (Hz)')
plt.ylabel('Phase imbalance (°)')
plt.title('Feature Space: CFO vs Phase Imbalance')
plt.legend()

plt.subplot(1, 3, 3)
plt.scatter(features['A'][:,1], features['A'][:,2], label='TX A', alpha=0.6, s=20)
plt.scatter(features['B'][:,1], features['B'][:,2], label='TX B', alpha=0.6, s=20)
plt.xlabel('Gain imbalance (dB)')
plt.ylabel('Phase imbalance (°)')
plt.title('Feature Space: Gain vs Phase Imbalance')
plt.legend()

plt.tight_layout()
plt.savefig('lab4/feature_space.png', dpi=150)

Part B: Distance-Based Classifier (20 min)

#!/usr/bin/env python3
"""Lab 4 Part B: RF fingerprint classifier."""
import numpy as np

def classify(feature_vector, centroid_A, centroid_B, scale):
    """Classify feature vector based on nearest centroid (scaled Euclidean)."""
    f = feature_vector / scale  # normalize each feature dimension
    d_A = np.linalg.norm(f - centroid_A / scale)
    d_B = np.linalg.norm(f - centroid_B / scale)
    return 'A' if d_A <= d_B else 'B'

# Split features: first 40 for training, last 10 for test
train_A = features['A'][:40]
train_B = features['B'][:40]
test_A = features['A'][40:]
test_B = features['B'][40:]

# Compute centroids
centroid_A = np.mean(train_A, axis=0)
centroid_B = np.mean(train_B, axis=0)

# Scale by feature std for normalization
all_train = np.vstack([train_A, train_B])
scale = np.std(all_train, axis=0) + 1e-10

# Evaluate on test set
correct = 0
total = len(test_A) + len(test_B)
for f in test_A:
    if classify(f, centroid_A, centroid_B, scale) == 'A':
        correct += 1
for f in test_B:
    if classify(f, centroid_A, centroid_B, scale) == 'B':
        correct += 1

print(f"Classification accuracy at SNR=20 dB: {correct}/{total} = {correct/total*100:.1f}%")

# Accuracy vs. SNR
snr_levels = [-5, 0, 5, 10, 15, 20]
n_test_per_tx = 100
accuracy_vs_snr = []

for snr_db in snr_levels:
    correct = 0
    total = n_test_per_tx * 2
    
    for tx_id, params in tx_params.items():
        for _ in range(n_test_per_tx):
            rx = generate_tx(
                params['cfo_hz'], params['gain_db'], params['phase_deg'],
                N_samples, fs, pilot, snr_db
            )
            cfo_est = estimate_cfo(rx, pilot, fs)
            g_est, p_est = estimate_iq_imbalance(rx)
            f_vec = np.array([cfo_est, g_est, p_est])
            predicted = classify(f_vec, centroid_A, centroid_B, scale)
            if predicted == tx_id:
                correct += 1
    
    accuracy = correct / total
    accuracy_vs_snr.append(accuracy)
    print(f"SNR={snr_db:3d} dB: accuracy = {accuracy*100:.1f}%")

import matplotlib.pyplot as plt
plt.figure(figsize=(8, 5))
plt.plot(snr_levels, [a*100 for a in accuracy_vs_snr], 'b-o')
plt.axhline(50, color='r', linestyle='--', label='Chance level (50%)')
plt.axhline(90, color='g', linestyle='--', label='90% target')
plt.xlabel('SNR (dB)')
plt.ylabel('Classification accuracy (%)')
plt.title('RF Fingerprinting Accuracy vs. SNR')
plt.legend()
plt.grid(True)
plt.savefig('lab4/accuracy_vs_snr.png', dpi=150)

Lab Report

Create lab-4-report.md with:

  1. Feature statistics table for TX A and TX B at SNR=20 dB (CFO, gain imbalance, phase imbalance)
  2. Feature space scatter plots (3 panels from Part A)
  3. Classification accuracy at SNR=20 dB
  4. Accuracy vs. SNR curve; identify the SNR at which accuracy drops below 90%
  5. Analysis (150 words): "An IMSI catcher clones a legitimate gNB's MAC address and protocol messages but uses a different SDR hardware platform. Explain how RF fingerprinting could detect it. What SNR would be required at the UE receiver to achieve 90% detection accuracy? What limitation would make this detection unreliable in a dense urban environment?"

Grading

Component Points
Feature extraction: CFO, gain, phase correctly estimated 5
Feature space: two transmitters visually separable 3
Classifier: accuracy vs. SNR curve with 90% breakpoint 5
Analysis: IMSI catcher detection argument 2
Total 15