diff --git a/inference.py b/inference.py
index ea16f56..833ddb6 100644
--- a/inference.py
+++ b/inference.py
@@ -34,6 +34,10 @@ parser.add_argument('--wav2lip_batch_size', type=int, help='Batch size for Wav2L
 parser.add_argument('--resize_factor', default=1, type=int, 
 			help='Reduce the resolution by this factor. Sometimes, best results are obtained at 480p or 720p')
 
+parser.add_argument('--crop', nargs='+', type=int, default=[0, 0, -1, -1], 
+					help='Crop video to a smaller region (top, bottom, left, right). Applied after resize_factor arg. ' 
+					'Useful if multiple face present. -1 implies the value will be auto-inferred based on height, width')
+
 args = parser.parse_args()
 args.img_size = 96
 
@@ -180,6 +184,12 @@ def main():
 			if args.resize_factor > 1:
 				frame = cv2.resize(frame, (frame.shape[1]//args.resize_factor, frame.shape[0]//args.resize_factor))
 
+			y1, y2, x1, x2 = args.crop
+			if x2 == -1: x2 = frame.shape[1]
+			if y2 == -1: y2 = frame.shape[0]
+
+			frame = frame[y1:y2, x1:x2]
+
 			full_frames.append(frame)
 
 	print ("Number of frames available for inference: "+str(len(full_frames)))