#!/bin/python ###--# Imports #--### import re, httplib from sys import argv,exit ###--# Constants #--### MODE_FILE = 0 # Read data from file MODE_URL = 1 # Read data from a website MODE_GROUP = 2 # Read data from Furc's site form specific groupname PHASE_LINKS = 0 # Group link acquirement phase PHASE_NAMES = 1 # Group memeber acquirement phase GRP_ASSOC = 0 # Associate in charge GRP_HEAD = 1 # Group head GRP_ELDER = 2 # Group elder GRP_MEMBER = 3 # Group member ###--# Checking Syntax #--### if len(argv) < 2: print 'Syntax: %s [f|u|g] ' exit(1) elif len(argv) < 3: mode = MODE_GROUP data = argv[1] else: if argv[1][0] == 'f': mode = MODE_FILE elif argv[1][0] == 'u': mode = MODE_URL else: mode = MODE_GROUP data = argv[2] ###--# Regular Expressions #--### matchGroupURL = re.compile( '([^<]*)' ) matchName = re.compile( ']*>([^<]+)' ) ###--# Variable Initialization #--### useragent = 'Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1)' rooturl = 'http://furcadia.com/services/beekins/listbeekins.php4' groups = [] # Group names and links names = [[],[],[],[]] # Name list for each group, divided into access level nNames = 0 # Total amount of names defTitle = 'Furcadia: Beekin Volunteer Listing' group = 0 phase = PHASE_LINKS ###--# Buffer Storage #--### print '---++ Beekin List Parser ++---\n' print '[+] Acquiring beekin list...' if mode == MODE_FILE: buffer = file( data, 'r' ).read() elif mode == MODE_URL: url = data elif mode == MODE_GROUP: url = rooturl + '?group=' + data.lower() else: print '[E] Unknown acquisition mode - ' + str(mode) exit(2) if mode == MODE_URL or mode == MODE_GROUP: # Figure out whether we start from beginning or after http:// if url[:7] == 'http://': idx = 7 else: idx = 0 # Set the address we're connecting to try: addr = url[idx:url[7:].index( '/' )+7] except: addr = url[idx:] print '[+] Connecting to %s...' % addr conn = httplib.HTTPConnection( addr ) conn.connect () conn.putrequest ( 'GET', url ) conn.putheader ( 'User-Agent', useragent ) conn.endheaders () print '[+] Getting data...' ret = conn.getresponse() if ret.status != 200: print '[E] Bad status:', str(ret.status), ret.reason exit(3) buffer = ret.read() ret.close() del(ret) del(conn) ###--# Getting Site Title #--### title = re.findall( '(.*)', buffer )[0] if title != defTitle: print '[!] WARNING: Title does not match the common one. Current: ' + title print '[+] Title: ' + title ###--# Parsing Buffer #--### for line in buffer.split('\n'): if phase == PHASE_LINKS: # Did we leave the phase yet? # if line[:7] == '


': phase = PHASE_NAMES continue # Is it a group link? # else: ret = re.findall( matchGroupURL, line ) if ret != []: # print '[D] New Group: %s (%s)' % ( ret[0][0],ret[0][1] ) groups.append( ret[0] ) elif phase == PHASE_NAMES: # Is it a name? # ret = re.findall( matchName, line ) if ret != [] and ret[0] != ' ': # print '[D] New Name: ' + ret[0] names[group].append( ret[0] ) nNames += 1 continue # Is it a new group marker? # elif line[:7] == '


': # print '[D] Switching groups!' group += 1 ###--# Finalization #--### def grpPrint( groupno ): for name in names[groupno]: print ' > ' + name.replace( '|',' ' ) print '\n---++ Summary ++---\n' print '[#] Groups Found (%d):' % len( groups ) for url,name in groups: print ' > ' + name print '[#] Group Associates (%d):' % len( names[GRP_ASSOC] ) grpPrint( GRP_ASSOC ) print '[#] Group Heads (%d):' % len( names[GRP_HEAD] ) grpPrint( GRP_HEAD ) print '[#] Group Elders (%d):' % len( names[GRP_ELDER] ) grpPrint( GRP_ELDER ) print '[#] Group Members (%d):' % len( names[GRP_MEMBER] ) grpPrint( GRP_MEMBER ) print '[#] Total Personnel: ' + str( nNames ) ###--# END OF SCRIPT #--###