Source code for workbench.clients.pcap_bro_urls
"""This client gets extracts URLs from PCAP files (via Bro logs)."""
import zerorpc
import os
import pprint
import client_helper
[docs]def run():
"""This client gets extracts URLs from PCAP files (via Bro logs)."""
# Grab server args
args = client_helper.grab_server_args()
# Start up workbench connection
workbench = zerorpc.Client(timeout=300, heartbeat=60)
workbench.connect('tcp://'+args['server']+':'+args['port'])
# Loop through all the pcaps and collect a set of urls(hosts) from the http_log files
urls = set()
data_path = os.path.join(os.path.dirname(os.path.realpath(__file__)),'../data/pcap')
file_list = [os.path.join(data_path, child) for child in os.listdir(data_path)]
for filename in file_list:
# Skip OS generated files
if '.DS_Store' in filename: continue
with open(filename,'rb') as f:
base_name = os.path.basename(filename)
pcap_md5 = workbench.store_sample(f.read(), base_name, 'pcap')
results = workbench.work_request('pcap_bro', pcap_md5)
# Just grab the http log
if 'http_log' in results['pcap_bro']:
log_md5 = results['pcap_bro']['http_log']
http_data = workbench.stream_sample(log_md5) # None Means all data
urls = set( row['host'] for row in http_data)
print '<<< %s >>>' % filename
pprint.pprint(list(urls))
print
[docs]def test():
"""Exexutes pcap_bro_urls test."""
run()
if __name__ == '__main__':
run()