In [2]:
import sqlite3
import pandas as pd
import csv as c
import seaborn
import matplotlib.pyplot as pp
/home/biom/anaconda3/lib/python3.5/site-packages/matplotlib/__init__.py:872: UserWarning: axes.color_cycle is deprecated and replaced with axes.prop_cycle; please use the latter.
  warnings.warn(self.msg_depr % (key, alt_key))
In [3]:
conn = sqlite3.connect('testing.db')
c = conn.cursor()

data = pd.read_sql('select * from data',conn)
data
Out[3]:

No data shown due to privacy

14178549 rows × 37 columns

In [3]:
import csv, sqlite3

#A script to uniform mixed countries

con = sqlite3.connect("testing.db")
cur = con.cursor()
#France
cur.execute('UPDATE data SET country = "france" WHERE country = "fra" or country = "france                                            " or country = "france, metropolitan" or country = "fr" or country = "fr     " or country = "france "')
#United States
cur.execute('UPDATE data SET country = "united states" WHERE country = "u.s.a." or country = "us" or country = "us     " or country = "usa" or country ="us minor outlying islands" or country = "us minor outlying i" or country = "united states minor outlying islands" or country = "united states minor outlying isl" or country = "united states                                     "or country = "u.s. virgin islands"')
#Canada
cur.execute('UPDATE data SET country = "canada" WHERE country = "can" or country = " canada" or country = "canada " or country = "ca" or country = "canada                                            "')
#United Kingdom
cur.execute('UPDATE data SET country = "united kingdom" WHERE country = "gb" or country = "uk" or country = "gbr" or country ="united kingdom                                    "or country = "gb     "')
#Germany
cur.execute('UPDATE data SET country = "germany" WHERE country = "de" or country = "ger" or country = "de     "')
#Italy
cur.execute('UPDATE data SET country = "italy" WHERE country = "it" or country = "italy                                             " or country = "lt     "  or country = "it     " or country = "ita" or country = "italy  "')
#Sweden
cur.execute('UPDATE data SET country = "sweden" WHERE country = "se" or country = "se     " or country = "sweden " or country = "se     "')
#Austalia
cur.execute('UPDATE data SET country = "australia" WHERE country = "au" or country = "aus" or country = "australia" or country = "australia                                         "')
#Finland
cur.execute('UPDATE data SET country = "finland" WHERE country = "fi" or country = "fi     "or country = "fi     "')
#Netherlands
cur.execute('UPDATE data SET country = "netherlands" WHERE country = "nl" or country = "netherlands   " or country = "nl     " or country = "netherlands antilles"')
#Spain
cur.execute('UPDATE data SET country = "spain" WHERE country = "es" or country = "es     "')
#Denmark
cur.execute('UPDATE data SET country = "denmark" WHERE country = "dk" or country = "dk     "')
#Columbia
cur.execute('UPDATE data SET country = "columbia" WHERE country = "col" or country = "co"')
#Norway
cur.execute('UPDATE data SET country = "norway" WHERE country = "no" or country = "no     " or country = "norway "')
#Russia
cur.execute('UPDATE data SET country = "russia" WHERE country = "russian federation" or country = "ru     " or country = "ussr" or country = "ru"')
#Austria
cur.execute('UPDATE data SET country = "austria" WHERE country = "at" or country = "at     "')
#New Zealand
cur.execute('UPDATE data SET country = "new zealand" WHERE country = "nz"')
#Hungary
cur.execute('UPDATE data SET country = "hungary" WHERE country = "hu" or country ="hu     "')
#Portugal
cur.execute('UPDATE data SET country = "portugal" WHERE country = "pt" or country = "pt     "')
#Romainia
cur.execute('UPDATE data SET country = "romainia" WHERE country = "ro" or country = "ro     "')
#Poland
cur.execute('UPDATE data SET country = "poland" WHERE country = "pl" or country = "pl     "')
#Brazil
cur.execute('UPDATE data SET country = "brazil" WHERE country = "br"')
#Ireland
cur.execute('UPDATE data SET country = "ireland" WHERE country = "ie     " or country = "ie"')
#Argentina
cur.execute('UPDATE data SET country = "argentina" WHERE country = "ar     "')
#turkey
cur.execute('UPDATE data SET country = "turkey" WHERE country = "tr" or country = "tu"')
#Bulgaria
cur.execute('UPDATE data SET country = "bulgaria" WHERE country = "bg"')
#Greece
cur.execute('UPDATE data SET country = "greece" WHERE country = "gr" or country = "gr     " or country = "greece "')
#Switzerland
cur.execute('UPDATE data SET country = "switzerland" WHERE country = "ch" or country = "ch     "')
#Japan
cur.execute('UPDATE data SET country = "japan" WHERE country = "jp" or country = "japan  "')
#Estonia
cur.execute('UPDATE data SET country = "estonia" WHERE country = "ee" or country = "ee     "')
#Latvia
cur.execute('UPDATE data SET country = "latvia" WHERE country = "lv" or country = "lv     "')
#Mexico
cur.execute('UPDATE data SET country = "mexico" WHERE country = "mx"')
#Slovakia
cur.execute('UPDATE data SET country = "slovakia" WHERE country = "sk     " or country = "sk"')
#Vietnam
cur.execute('UPDATE data SET country = "vietnam" WHERE country = "viet nam"')
#Lithuania
cur.execute('UPDATE data SET country = "lithuania" WHERE country = "lt"')
#Slovenia
cur.execute('UPDATE data SET country = "slovenia" WHERE country = "si     " or country = "si"')
#Korea
cur.execute('UPDATE data SET country = "korea" WHERE country = "korea (south)" or country = "korea, south" or country = "south korea"')
#South Africa
cur.execute('UPDATE data SET country = "south africa" WHERE country = "za"')
#India
cur.execute('UPDATE data SET country = "india" WHERE country = "in"')
#Croatia
cur.execute('UPDATE data SET country = "croatia" WHERE country = "hr"')
#Chile
cur.execute('UPDATE data SET country = "chile" WHERE country = "cl"')

con.commit()
con.close()
In [4]:
crazy = data.groupby('country').size().nlargest(20)
crazy
Out[4]:
country
united states     2174623
united kingdom    1548974
france            1375678
turkey            1091715
germany            785181
canada             777960
                   653342
australia          419767
unknown            383871
italy              372182
sweden             302669
il                 243593
netherlands        224752
null               219184
russia             158762
finland            158119
denmark             93068
spain               92982
poland              76409
greece              74358
dtype: int64
In [6]:
import sqlite3
import pandas.io.sql as sql

crazy.to_csv('export.csv')
print("Success")
Success