Week: 5 -- RF Security Primitives
Points: 15
Time estimate: 60 min lab + 2 hr independent
Deliverable: lab-4-report.md + Python classifier
Objectives
- Capture transmissions from two same-make SDR units (or two channels of one ANT-SDR E200); extract CFO and I/Q imbalance as fingerprinting features.
- Build a simple distance-based classifier that distinguishes the two units from their RF fingerprint.
- Evaluate classification accuracy at multiple SNR levels.
Setup
Option A (two hardware units): Two ANT-SDR E200 units, or one HackRF transmitting and one ANT-SDR E200 receiving. Transmit a known pilot signal from each unit in turn; record the captured IQ from the receiver.
Option B (simulated): Use the I/Q imbalance model from Week 4 lecture to simulate two transmitters with different hardware parameters. This option is valid if only one SDR is available.
Part A: Feature Extraction (25 min)
#!/usr/bin/env python3
"""Lab 4 Part A: RF fingerprinting feature extraction."""
import numpy as np
import matplotlib.pyplot as plt
from scipy.signal import correlate
# ---- Pilot-based CFO estimation ----
def estimate_cfo(samples, pilot, fs):
"""Estimate carrier frequency offset from pilot correlation."""
corr = correlate(samples[:4*len(pilot)], pilot, mode='valid')
peak_idx = np.argmax(np.abs(corr))
if peak_idx + len(pilot) < len(samples):
seg1 = samples[peak_idx:peak_idx+len(pilot)]
seg2 = samples[peak_idx+len(pilot):peak_idx+2*len(pilot)]
if len(seg2) == len(pilot):
corr1 = np.sum(seg1 * np.conj(pilot))
corr2 = np.sum(seg2 * np.conj(pilot))
phase_delta = np.angle(corr2 * np.conj(corr1))
return phase_delta * fs / (2 * np.pi * len(pilot))
return np.angle(corr[np.argmax(np.abs(corr))]) * fs / (2 * np.pi * len(pilot))
# ---- I/Q imbalance estimation ----
def estimate_iq_imbalance(x):
"""Estimate gain and phase imbalance from signal statistics."""
I = x.real
Q = x.imag
E_II = np.mean(I**2)
E_QQ = np.mean(Q**2)
E_IQ = np.mean(I*Q)
gain_db = 20*np.log10(np.sqrt(E_II/(E_QQ + 1e-12)))
phase_deg = np.degrees(np.arcsin(np.clip(2*E_IQ/(E_II+E_QQ), -1, 1)))
return gain_db, phase_deg
# ---- Simulate two SDR transmitters ----
np.random.seed(42)
fs = 1e6
N_samples = 10000
# Pilot: Zadoff-Chu sequence (same as LTE preamble -- good autocorrelation)
def zadoff_chu(N, u=1):
"""Zadoff-Chu sequence of length N with root u."""
n = np.arange(N)
if N % 2 == 0:
return np.exp(-1j * np.pi * u * n * (n) / N)
else:
return np.exp(-1j * np.pi * u * n * (n+1) / N)
pilot = zadoff_chu(64) # 64-sample pilot
# Transmitter A: CFO=+1.2 kHz, gain_imb=0.4 dB, phase_imb=1.8°
# Transmitter B: CFO=-0.7 kHz, gain_imb=-0.2 dB, phase_imb=3.1°
tx_params = {
'A': {'cfo_hz': 1200.0, 'gain_db': 0.4, 'phase_deg': 1.8},
'B': {'cfo_hz': -700.0, 'gain_db': -0.2, 'phase_deg': 3.1},
}
def generate_tx(cfo_hz, gain_db, phase_deg, N, fs, pilot, snr_db):
"""Generate received signal from a transmitter with given hardware params."""
# Repeat pilot N times with random data in between
signal = np.tile(pilot, N // len(pilot) + 1)[:N]
# Apply CFO
t = np.arange(N) / fs
signal = signal * np.exp(2j * np.pi * cfo_hz * t)
# Apply I/Q imbalance
A = 10**(gain_db/20)
phi = np.deg2rad(phase_deg)
I = signal.real
Q = signal.imag
signal = I + 1j * (A * np.cos(phi) * Q + A * np.sin(phi) * I)
# Add noise
snr_linear = 10**(snr_db/10)
signal_power = np.mean(np.abs(signal)**2)
noise_std = np.sqrt(signal_power / (2 * snr_linear))
noise = noise_std * (np.random.randn(N) + 1j*np.random.randn(N))
return signal + noise
# Extract features at SNR = 20 dB (high SNR baseline)
snr_db = 20.0
n_captures = 50 # 50 captures per transmitter
features = {'A': [], 'B': []}
for tx_id, params in tx_params.items():
for _ in range(n_captures):
rx = generate_tx(
params['cfo_hz'], params['gain_db'], params['phase_deg'],
N_samples, fs, pilot, snr_db
)
cfo_est = estimate_cfo(rx, pilot, fs)
g_est, p_est = estimate_iq_imbalance(rx)
features[tx_id].append([cfo_est, g_est, p_est])
features = {k: np.array(v) for k, v in features.items()}
print("Feature statistics (mean ± std) at SNR=20 dB:")
for tx_id in ['A', 'B']:
f = features[tx_id]
print(f"\n TX {tx_id}:")
print(f" CFO: {np.mean(f[:,0]):8.1f} ± {np.std(f[:,0]):.1f} Hz")
print(f" Gain imb: {np.mean(f[:,1]):8.3f} ± {np.std(f[:,1]):.3f} dB")
print(f" Phase imb: {np.mean(f[:,2]):8.3f} ± {np.std(f[:,2]):.3f}°")
# Feature space visualization
plt.figure(figsize=(12, 4))
plt.subplot(1, 3, 1)
plt.scatter(features['A'][:,0], features['A'][:,1], label='TX A', alpha=0.6, s=20)
plt.scatter(features['B'][:,0], features['B'][:,1], label='TX B', alpha=0.6, s=20)
plt.xlabel('CFO (Hz)')
plt.ylabel('Gain imbalance (dB)')
plt.title('Feature Space: CFO vs Gain Imbalance')
plt.legend()
plt.subplot(1, 3, 2)
plt.scatter(features['A'][:,0], features['A'][:,2], label='TX A', alpha=0.6, s=20)
plt.scatter(features['B'][:,0], features['B'][:,2], label='TX B', alpha=0.6, s=20)
plt.xlabel('CFO (Hz)')
plt.ylabel('Phase imbalance (°)')
plt.title('Feature Space: CFO vs Phase Imbalance')
plt.legend()
plt.subplot(1, 3, 3)
plt.scatter(features['A'][:,1], features['A'][:,2], label='TX A', alpha=0.6, s=20)
plt.scatter(features['B'][:,1], features['B'][:,2], label='TX B', alpha=0.6, s=20)
plt.xlabel('Gain imbalance (dB)')
plt.ylabel('Phase imbalance (°)')
plt.title('Feature Space: Gain vs Phase Imbalance')
plt.legend()
plt.tight_layout()
plt.savefig('lab4/feature_space.png', dpi=150)
Part B: Distance-Based Classifier (20 min)
#!/usr/bin/env python3
"""Lab 4 Part B: RF fingerprint classifier."""
import numpy as np
def classify(feature_vector, centroid_A, centroid_B, scale):
"""Classify feature vector based on nearest centroid (scaled Euclidean)."""
f = feature_vector / scale # normalize each feature dimension
d_A = np.linalg.norm(f - centroid_A / scale)
d_B = np.linalg.norm(f - centroid_B / scale)
return 'A' if d_A <= d_B else 'B'
# Split features: first 40 for training, last 10 for test
train_A = features['A'][:40]
train_B = features['B'][:40]
test_A = features['A'][40:]
test_B = features['B'][40:]
# Compute centroids
centroid_A = np.mean(train_A, axis=0)
centroid_B = np.mean(train_B, axis=0)
# Scale by feature std for normalization
all_train = np.vstack([train_A, train_B])
scale = np.std(all_train, axis=0) + 1e-10
# Evaluate on test set
correct = 0
total = len(test_A) + len(test_B)
for f in test_A:
if classify(f, centroid_A, centroid_B, scale) == 'A':
correct += 1
for f in test_B:
if classify(f, centroid_A, centroid_B, scale) == 'B':
correct += 1
print(f"Classification accuracy at SNR=20 dB: {correct}/{total} = {correct/total*100:.1f}%")
# Accuracy vs. SNR
snr_levels = [-5, 0, 5, 10, 15, 20]
n_test_per_tx = 100
accuracy_vs_snr = []
for snr_db in snr_levels:
correct = 0
total = n_test_per_tx * 2
for tx_id, params in tx_params.items():
for _ in range(n_test_per_tx):
rx = generate_tx(
params['cfo_hz'], params['gain_db'], params['phase_deg'],
N_samples, fs, pilot, snr_db
)
cfo_est = estimate_cfo(rx, pilot, fs)
g_est, p_est = estimate_iq_imbalance(rx)
f_vec = np.array([cfo_est, g_est, p_est])
predicted = classify(f_vec, centroid_A, centroid_B, scale)
if predicted == tx_id:
correct += 1
accuracy = correct / total
accuracy_vs_snr.append(accuracy)
print(f"SNR={snr_db:3d} dB: accuracy = {accuracy*100:.1f}%")
import matplotlib.pyplot as plt
plt.figure(figsize=(8, 5))
plt.plot(snr_levels, [a*100 for a in accuracy_vs_snr], 'b-o')
plt.axhline(50, color='r', linestyle='--', label='Chance level (50%)')
plt.axhline(90, color='g', linestyle='--', label='90% target')
plt.xlabel('SNR (dB)')
plt.ylabel('Classification accuracy (%)')
plt.title('RF Fingerprinting Accuracy vs. SNR')
plt.legend()
plt.grid(True)
plt.savefig('lab4/accuracy_vs_snr.png', dpi=150)
Lab Report
Create lab-4-report.md with:
- Feature statistics table for TX A and TX B at SNR=20 dB (CFO, gain imbalance, phase imbalance)
- Feature space scatter plots (3 panels from Part A)
- Classification accuracy at SNR=20 dB
- Accuracy vs. SNR curve; identify the SNR at which accuracy drops below 90%
- Analysis (150 words): "An IMSI catcher clones a legitimate gNB's MAC address and protocol messages but uses a different SDR hardware platform. Explain how RF fingerprinting could detect it. What SNR would be required at the UE receiver to achieve 90% detection accuracy? What limitation would make this detection unreliable in a dense urban environment?"
Grading
| Component | Points |
|---|---|
| Feature extraction: CFO, gain, phase correctly estimated | 5 |
| Feature space: two transmitters visually separable | 3 |
| Classifier: accuracy vs. SNR curve with 90% breakpoint | 5 |
| Analysis: IMSI catcher detection argument | 2 |
| Total | 15 |