# 小郑之家~

### 这个paper的主要内容

• 网络结构

MDnet是用cnn来做目标跟踪的一个经典之作，网络结构比较简单，即三个卷积层后面接上两个全连接层，然后后面接上一个Domain-specific Layers，代码如下


self.layers = nn.Sequential(OrderedDict([
('conv1', nn.Sequential(nn.Conv2d(3, 96, kernel_size=7, stride=2),
nn.ReLU(),
LRN(),
nn.MaxPool2d(kernel_size=3, stride=2))),
('conv2', nn.Sequential(nn.Conv2d(96, 256, kernel_size=5, stride=2),
nn.ReLU(),
LRN(),
nn.MaxPool2d(kernel_size=3, stride=2))),
('conv3', nn.Sequential(nn.Conv2d(256, 512, kernel_size=3, stride=1),
nn.ReLU())),
('fc4',   nn.Sequential(nn.Dropout(0.5),
nn.Linear(512 * 3 * 3, 512),
nn.ReLU())),
('fc5',   nn.Sequential(nn.Dropout(0.5),
nn.Linear(512, 512),
nn.ReLU()))]))

self.branches = nn.ModuleList([nn.Sequential(nn.Dropout(0.5),
nn.Linear(512, 2)) for _ in range(K)])



• gt

pos_score, neg_score, shape是（36,2），（96,2）因为是有target和background需要判断，所以输出的shape是2, 然后便用这两个去做BCE,具体是用正样本的第二列和负样本的第一列去做的，然后两个加起来就是最终的loss 函数。

• 具体是是如何产生正的和负的bbox的?

if self.valid:
samples[:,:2] = np.clip(samples[:,:2], samples[:,2:]/2, self.img_size-samples[:,2:]/2-1)
else:
samples[:,:2] = np.clip(samples[:,:2], 0, self.img_size)


n_pos, n_neg 4 12



def gen_samples(generator, bbox, n, overlap_range=None, scale_range=None):
if overlap_range is None and scale_range is None:
return generator(bbox, n)
else:
samples = None
remain = n
factor = 2
while remain > 0 and factor < 16:
# 用下面的generateor来产生samples
samples_ = generator(bbox, remain*factor)  # 8, 24

idx = np.ones(len(samples_), dtype=bool) # 初始化一个
if overlap_range is not None:
r = overlap_ratio(samples_, bbox) # 计算iou的.
idx *= (r >= overlap_range[0]) * (r <= overlap_range[1])
# 找到iou在这个range范围内的.
if scale_range is not None:  # scale的range也不为空的话.
s = np.prod(samples_[:,2:], axis=1) / np.prod(bbox[2:])
idx *= (s >= scale_range[0]) * (s <= scale_range[1])

samples_ = samples_[idx,:]  # 选出有效的那些.
samples_ = samples_[:min(remain, len(samples_))]
if samples is None:
samples = samples_
else:
samples = np.concatenate([samples, samples_])
remain = n - len(samples) # 如果第一次没有产生够
factor = factor*2

return samples



• 有个问题是怎么保证每次都能产生给定数量的正的样本和负的样本？

### 如何进行推理的

• Ridge回归部分具体是怎样的？

# Train bbox regressor
bbreg_examples = gen_samples(SampleGenerator('uniform', image.size, 0.3, 1.5, 1.1),
target_bbox, opts['n_bbreg'], opts['overlap_bbreg'], opts['scale_bbreg'])
# 然后从框中采样
bbreg_feats = forward_samples(model, image, bbreg_examples)
bbreg = BBRegressor(image.size)
bbreg.train(bbreg_feats, bbreg_examples, target_bbox)



def get_examples(self, bbox, gt):
bbox[:,:2] = bbox[:,:2] + bbox[:,2:]/2   # 变到中心
gt[:,:2] = gt[:,:2] + gt[:,2:]/2   # gt的也变到中心
dst_xy = (gt[:,:2] - bbox[:,:2]) / bbox[:,2:]
dst_wh = np.log(gt[:,2:] / bbox[:,2:])
Y = np.concatenate((dst_xy, dst_wh), axis=1)
return Y



def train(self, X, bbox, gt):
X = X.cpu().numpy()
bbox = np.copy(bbox)
gt = np.copy(gt)

if gt.ndim==1:
gt = gt[None,:]

r = overlap_ratio(bbox, gt)  # 算IOU
s = np.prod(bbox[:,2:], axis=1) / np.prod(gt[0,2:])
# 过滤出IOU在范围内的，且scale在范围内的index
idx = (r >= self.overlap_range[0]) * (r <= self.overlap_range[1]) * \
(s >= self.scale_range[0]) * (s <= self.scale_range[1])
# 得到这些过滤出的特征信息
X = X[idx]
# 得到它们的bbox
bbox = bbox[idx]

# Y相当于是Faster--rcnn中的gt
Y = self.get_examples(bbox, gt)

# 进行拟合
self.model.fit(X, Y) # 即作为一个拟合的问题



• target是如何进行更新的?

• 如果当前的这一桢没有跟踪到怎么办？