#! /usr/bin/python -O
#
# Standalone packet capture script derived from Packet Garden.
#
# Copyright (C) 2007 Julian Oliver 
#
# See the README.txt for dependencies and usage instructions
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA

import pcap
import dpkt
import time
import socket
import os 
import os.path
import sys
import gzip
import struct
import fcntl

try:
  import psyco
  psyco.full()
except ImportError:
  pass
try:
  import GeoIP
except ImportError:
  print "GeoIP module not found. there will be no country codes generated for captured IP's"
  pass

# setup wide scope stuff 
dict = {}
hostname = socket.gethostname()
msg = "-----------------------------------------------" '\n' \
      "| Invalid device or insufficient permissions. |" '\n' \
      "| Become root or try another device.          |" '\n' \
      "-----------------------------------------------"

PLATFORM = sys.platform

# packet capture class
class packet:
  def get_local_ip(self, device): 
    if "darwin" in PLATFORM: # OS X
      ifconfig = os.popen('ifconfig ' + device).readlines()
      for lines in ifconfig:
        if "inet" and "netmask" in lines:
          ip_addr = lines.split()[1]
          return ip_addr
    else: # defaults to Linux
      s = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
      return socket.inet_ntoa(fcntl.ioctl(s.fileno(), 0x8915, struct.pack('256s', device[:15]))[20:24])
  def geo_ip(self, ip):
    gi = GeoIP.new(GeoIP.GEOIP_MEMORY_CACHE)
    return gi.country_code_by_addr(str(ip[0])+"."+str(ip[1])+"."+str(ip[2])+"."+str(ip[3]))
  def get_ports(self):
    self.ports = {}
    self.groups = {}
    filter_file = open(os.path.join('.', 'config', 'filter.config')).readlines()
    for i in filter_file:
      if '=' in i:
        list = []
        i = i.strip('\n')
        a, b  = i.split('=')
        a = a.strip(); b = b.lstrip()
        for i in b.split(','):
          i = i.strip()
          if '-' in i:
            min, max = i.split('-')
            for i in [i for i in range(int(min), int(max))]:
              list.append(i)
          else:
            list = list +[int(i)]
          self.ports[a] = list
      elif ':' in i:
        list = []
        a, b = i.split(':')
        a = a.strip(); b = b.strip()
        for i in b.split(','):
          i = i.strip()
          list.append(i)
        self.groups[a] = list
      else:
        pass
  def capture(self, device, dir):
    self.dir = dir
    DIR = self.dir
    self.get_ports()
    start = time.strftime('%d'+'-'+'%m'+'-'+'%y')
    # read in old log if it exists. 
    # can't just append to it as we need to collate with existing entries.
    try:
      old_log = gzip.open(os.path.join(DIR, 'pcap' +'_' +hostname +'_'+start +'.log.gz'), 'r')
      for lines in old_log:
        ip, dir, proto, region, length = lines.split()
        ip = ip.strip()
        dir = dir.strip()
        proto = proto.strip()
        region = region.strip()
        length = int(length.strip())
        dict[str(ip) +" " +dir +" " +proto +" " +region] = length
    except:
      pass
    self.device = device
    count = 0
    p = pcap.pcap(self.device)
    ip = self.get_local_ip(self.device)
    list = []
    ip = b1, b2, b3, b4 = tuple([int(n) for n in ip.split('.')])
    net = (b1, b2, b3)
    while 1:
      try:
        for ts, pkt in p:
          packet = dpkt.ethernet.Ethernet(pkt)
          # standard network connection:
          if type(packet.data) == dpkt.ip.IP:
            source = struct.unpack('4B', packet.data.src)
            dest = struct.unpack('4B', packet.data.dst) # unpack 4 block struct type.
            length = packet.data.len
            source_port = packet.data.data.sport
            dest_port = packet.data.data.dport
          # extract PPP out of the Ethernet part of the data for PPPoE connections 
          # a big thanks to Ababab for spending much time in IRC with his PPPoE connection
          # so i could learn about PPP packet structure.
          elif type(packet.data.data) == dpkt.ppp.PPP:
            source = struct.unpack('4B', packet.data.data.data.src)
            dest = struct.unpack('4B', packet.data.data.data.dst) # unpack 4 block struct type.
            length = packet.data.data.data.len
            dest_port = packet.data.data.data.data.dport
            source_port = packet.data.data.data.data.sport
          # we're only interested in UDP and TCP packets
          geo_source = self.geo_ip(source)
          geo_dest = self.geo_ip(dest)
          if type(packet.data.data) in (dpkt.tcp.TCP, dpkt.udp.UDP) or type(packet.data.data.data.data) in (dpkt.tcp.TCP, dpkt.udp.UDP):
            count += 1
            # filter and tag on direction
            if source == ip: 
              dir = 'up'
            elif dest == ip:
              dir = 'down'
            # filter and tag on port by iterating through ports dict. 
            for k, v in self.ports.iteritems():
              if source_port in v or dest_port in v:
                proto = k
                break
              else:
                proto = 'unknown'
            # don't log packets from sources on the local network
            if (source[0], source[1], source[2]) == (b1, b2, b3) and source != ip:
              pass
            elif (source[0], source[1], source[2]) == (b1, b2, b3) and dest != ip:
              pass
            if source == ip:
              dest_net = str(dest[0]) +"." +str(dest[1]) +"." +str(dest[2]) +"." +str(dest[3])
              # fast collation using key matching. avoids expensive iterations.
              try:
                dict[str(dest_net) +" "  +dir +" " +proto +" " +geo_dest] += length
              except:
                dict[str(dest_net) +" "  +dir +" " +proto +" " +geo_dest] = length
            else:
              source_net = str(source[0]) +"." +str(source[1]) +"." +str(source[2]) +"." +str(source[3])
              try:
                dict[str(source_net) +" " +dir +" " +proto +" " +geo_source] += length
              except:
                dict[str(source_net) +" " +dir +" " +proto +" " +geo_source] = length
            if count > 1000:
              count = 0
              current = time.strftime('%d'+'-'+'%m'+'-'+'%y')
              t = time.time() 
              if current == start:
                log = gzip.open(os.path.join(DIR, 'pcap' +'_' +hostname +'_'+start +'.log.gz'), 'w')
                for k, v in dict.iteritems():
                  log.write(str(k) +" " +str(v) +'\n')
              else: # accomodate for day changeover. there'll always be a few lost packets here.
                dict.clear()
                start = current
                log = gzip.open(os.path.join(DIR, 'pcap' +'_' +hostname +'_'+start +'.log.gz'), 'w')
              log.flush()
              log.close()
              total = time.time() - t
              print "processed ", len(dict), "networks in ", total, "seconds" 
            else:
              pass
          else:
            pass
      except:
        pass

cap = packet()

try:
  device = sys.argv[1]
  dir = sys.argv[2]
  device.strip(); dir.strip()
  cap.capture(device, dir)
except:
  print msg
  raise SystemExit()

