#!/usr/bin/python
# -*- coding: ISO-8859-1 -*-
# Name: getData
# File: getData.py
# Version: 1.0
# Of: 11/12/2016
# Author: Marek Küthe
###################################
#  Please no edit syntax!         #
#                                 #
#  For run in other syntax:       #
#  1. Import the module getData   #
#    from getData import *        #
#  2. Run the Module getData      #
#    getData(website, path)       #
#                                 #
#   For more info please email:   #
#   webmaster[aet]test.mk16.de    #
###################################
############ STOP EDIT ############
###################################

import re;
import httplib;
import ConfigParser;
config = ConfigParser.ConfigParser();
config.read('getData.ini');

def getWebData(website,path,ssl=False):
    h = '';
    if(ssl == False):
        h = httplib.HTTPConnection(website,int(config.get('HTTP','http_port')));
    else:
        h = httplib.HTTPSConnection(website,int(config.get('HTTP','https_port')));
    x = {
        'Host':website
    }
    h.request(config.get('HTTP','method'),path,headers=x);
    r = h.getresponse();
    return [r.read(),r.status,r.reason];

def getResult(website,path,data):
    try:
        redata = re.findall(config.get('RegEx','humansFind'),data[0]);
        data = getWebData(redata[0][1],'/'+redata[0][2]);
        var = re.findall(config.get('RegEx','humansSearch'),data[0]);
        return var;
    except:
        return config.get('CONFIG','error_message');

def getSearch(var, search):
    for i in var:
        if(i[0] == search):
            return i[1];

def getMeta(name, data):
    data = data[0];
    redata = re.findall(config.get('RegEx','metaFind'),data);
    for i in redata:
        if(name == i[0]):
            return i[1];
def getRobotsInfo(name, website):
    try:
        data = getWebData(website, '/robots.txt')[0];
    except:
        data = config.get('CONFIG','not_found_robots_txt_message').split(';')[0];
        return config.get('CONFIG','not_found_robots_txt_message').split(';')[1];
    redata = re.findall(r'(.*): (.*)',data);
    r = '';
    w = int(0);
    for i in redata:
        if(i[0] == name):
             r += i[1]+"\n";
             w += 1;
    return [r,str(w)];
    
def getData(website, path, ssl=False, webData=False):
    if(webData == False):
        webData = getWebData(website,path,ssl);
    result = getResult(website, path, webData);
    sitemap = getRobotsInfo('Sitemap',website);
    allow = getRobotsInfo('Allow',website);
    disallow = getRobotsInfo('Disallow',website);
    author = getMeta('author', webData);
    web_author = getMeta('web_author', webData);
    ds = getMeta('description', webData);
    h1 = re.findall(config.get('RegEx','h1Find'),webData[0]);
    title = re.findall(config.get('RegEx','titleFind'),webData[0]);
    shortcut_icon = '';
    icon = '';
    try:
        shortcut_icon = re.findall(config.get('RegEx','shortcut_iconFind'),webData[0])[0];
    except:
        True;
    try:
        icon = re.findall(config.get('RegEx','iconFind'),webData[0])[0];
    except:
        True;
    if(icon == None):
        icon = 'None';
    if(shortcut_icon == None):
        shortcut_icon = 'None';
    try:
        tit = title[0];
    except:
        True;
    for i in h1:
        tit += ' - '+str(i);
    js = '';
    jss = re.findall(config.get('RegEx','JavaScriptFind'),webData[0]);
    w = int(0);
    for i in jss:
        js += i.replace('" language="Javascript','')+"\n";
        w += 1;
    js += str(w)+' JavaScirpts'

    
    css = '';
    w = int(0);
    
    csss = re.findall(config.get('RegEx','CSS0Find'),webData[0]);
    for i in csss:
        css += i.replace('" media="(min-width: 640px)','').replace('" media="(max-width: 640px)','')+"\n";
        w += 1;

    csss = re.findall(config.get('RegEx','CSS1Find'),webData[0]);
    for i in csss:
        css += i.replace('" media="(min-width: 640px)','').replace('" media="(max-width: 640px)','')+"\n";
        w += 1;

    csss = re.findall(config.get('RegEx','CSS2Find'),webData[0]);
    for i in csss:
        css += i.replace('" media="(min-width: 640px)','').replace('" media="(max-width: 640px)','')+"\n";
        w += 1;
        
    css += str(w)+' CSS'
    
    last_updata = getSearch(result,'Last update');
    doctype = getSearch(result,'Doctype');
    standards = getSearch(result,'Standards');
    languages = getSearch(result,'Language');
    language = re.findall(config.get('RegEx','HTML_LanguageFind'),webData[0]);
    try:
        language = language[0][0].split('"')[0];
    except:
        language = str(language);
    contact = getSearch(result,'Contact');
    web_designer = getSearch(result,'Web designer');
    chef = getSearch(result,'Chef');

    vp = str('');
    
    try: vp += 'Title: '+tit+"\n";
    except: True;
    
    try: vp += 'Languages: '+language+"\n";
    except: True;
    
    try: vp += 'Author: '+author+"\n";
    except: True;
    
    try: vp += 'Web author: '+web_author+"\n";
    except: True;
    
    try: vp += 'Description: '+ds+"\n";
    except: True;
    
    try: vp += 'Shortcut icon: '+shortcut_icon+"\n";
    except: True;
    
    try: vp += 'Icon: '+icon+"\n";
    except: True;
    
    try: vp += 'JavaScripts: '+"\n"+js+"\n";
    except: True;
    
    try: vp += 'CSS: '+"\n"+css+"\n";
    except: True;
    
    try: vp += 'Sitemaps(robots.txt): '+"\n"+sitemap[0]+sitemap[1]+' Sitemaps'+"\n";
    except: True;
    
    try: vp += 'Allow(robots.txt): '+"\n"+allow[0]+allow[1]+' Allow Objects'+"\n";
    except: True;
    
    try: vp += 'Disallow(robots.txt): '+"\n"+disallow[0]+disallow[1]+' Disallow Objects'+"\n";
    except: True;
    
    try: vp += 'Chef(humans.txt): '+chef+"\n";
    except: True;
    
    try: vp += 'Last updata(humans.txt): '+last_updata.split('/')[2]+'.'+last_updata.split('/')[1]+'.'+last_updata.split('/')[0]+"\n";
    except: True;
    
    try: vp += 'Web designer(humans.txt): '+web_designer+"\n";
    except: True;
    
    try: vp += 'Contact(humans.txt): '+contact+"\n";
    except: True;
    
    try: vp += 'Languages(humans.txt): '+languages+"\n";
    except: True;
    
    try: vp += 'Standards(humans.txt): '+standards+"\n";
    except: True;
    
    try: vp += 'Document type(humans.txt): '+doctype+"\n";
    except: True;

    vp += 'Next'+"\n\n";
    print(vp);
    return vp;