In [2]:
import sqlite3
import pandas as pd
import csv as c
import seaborn
import matplotlib.pyplot as pp
/home/biom/anaconda3/lib/python3.5/site-packages/matplotlib/__init__.py:872: UserWarning: axes.color_cycle is deprecated and replaced with axes.prop_cycle; please use the latter.
  warnings.warn(self.msg_depr % (key, alt_key))
In [3]:
import csv
import sqlite3
#Creating a new table to place DISTINCT values into From DATA, Removing duplicate records

con = sqlite3.connect("testing.db")
cur = con.cursor()
cur.execute("CREATE TABLE dupes_removed (id INTEGER PRIMARY KEY,first_name varchar(50),"
            "last_name varchar(50),email varchar(50),ip varchar(50),"
            "sex varchar(50),title varchar(50),birth_date varchar(50),address varchar(100)," 
            "city varchar(50),state varchar(50),country varchar(50),zip_postal varchar(20),"
            "phone varchar(50),work_phone varchar(50),mobile_phone varchar(50),alt_phone varchar(50),"
            "language varchar(20),signup_date varchar(20),last_login_date varchar(20),blocked varchar(20),"
            "casino_brand varchar(50),fav_game varchar(50),account_type varchar(50),currency varchar(50),"
            "net_loss float(10,2),first_deposit_date float(10,2),number_of_deposits int(10),last_deposit_date varchar(20),"
            "total_deposit_amount float(10,2),total_bonus_amount float(10,2),player_class varchar(20),address_2 varchar(20),user_name varchar(20),net_rev float(10,2),address_3 varchar(255),client_platform varchar(50));")

con.commit()
con.close()
In [4]:
import csv, sqlite3

#A script to remove duplicate lines

con = sqlite3.connect("testing.db")
cur = con.cursor()
#Fill Null
cur.execute('insert into dupes_removed(first_name,last_name,email,ip,sex,title,birth_date,address,city,state,country,zip_postal,phone,work_phone,mobile_phone,alt_phone,language,signup_date,last_login_date,blocked,casino_brand,fav_game,account_type,currency,net_loss,first_deposit_date,number_of_deposits,last_deposit_date,total_deposit_amount,total_bonus_amount,player_class,address_2,user_name,net_rev,address_3,client_platform) select distinct first_name,last_name,email,ip,sex,title,birth_date,address,city,state,country,zip_postal,phone,work_phone,mobile_phone,alt_phone,language,signup_date,last_login_date,blocked,casino_brand,fav_game,account_type,currency,net_loss,first_deposit_date,number_of_deposits,last_deposit_date,total_deposit_amount,total_bonus_amount,player_class,address_2,user_name,net_rev,address_3,client_platform from data')

con.commit()
con.close()
In [5]:
conn = sqlite3.connect('testing.db')
c = conn.cursor()

data = pd.read_sql('select * from dupes_removed',conn)
data
Out[5]:
id first_name last_name email ip sex title birth_date address city ... number_of_deposits last_deposit_date total_deposit_amount total_bonus_amount player_class address_2 user_name net_rev address_3 client_platform

No data shown due to privacy

11401072 rows × 37 columns

In [6]:
#A Script to query for a specific unique ID
import sqlite3

# open existing database
conn = sqlite3.connect('testing.db')
c = conn.cursor()

# print all lines ordered by ID decending.
for row in c.execute('SELECT * FROM dupes_removed ORDER BY id DESC LIMIT 10'):
    print(row[0],row[1],row[2],row[3],row[11],row[12],row[5],row[6],row[7])
No Data shown