Reading from a SQLite DB into Pandas

This note demonstrates reading from an exising SQLite database.

import pandas as pd
import sqlite3

Using Bash, it is possible to print out information about the database directly in Jupyter.

%%bash
cd reading-from-sqlite-db-to-pandas
sqlite3 flights.db
.tables
airlines  airports  routes  

Connect to the Database and Read from It

The following demonstrates opening and reading from each of the tables listed above.

Airlines

conn = sqlite3.connect('reading-from-sqlite-db-to-pandas/flights.db')
airline_string = '''SELECT * 
                    FROM airlines'''
airlines = pd.read_sql(airline_string,
                       conn,
                       index_col='index')
conn.close()
%%bash
cd reading-from-sqlite-db-to-pandas
sqlite3 flights.db
.schema airlines
CREATE TABLE airlines (
[index] INTEGER,
  [id] TEXT,
  [name] TEXT,
  [alias] TEXT,
  [iata] TEXT,
  [icao] TEXT,
  [callsign] TEXT,
  [country] TEXT,
  [active] TEXT
);
CREATE INDEX ix_airlines_index ON airlines ([index]);
airlines.info()
<class 'pandas.core.frame.DataFrame'>
Int64Index: 6048 entries, 0 to 6047
Data columns (total 8 columns):
 #   Column    Non-Null Count  Dtype 
---  ------    --------------  ----- 
 0   id        6048 non-null   object
 1   name      6048 non-null   object
 2   alias     5615 non-null   object
 3   iata      1461 non-null   object
 4   icao      5961 non-null   object
 5   callsign  5305 non-null   object
 6   country   6033 non-null   object
 7   active    6048 non-null   object
dtypes: object(8)
memory usage: 425.2+ KB
airlines.head()
id name alias iata icao callsign country active
index
0 1 Private flight \N - None None None Y
1 2 135 Airways \N None GNL GENERAL United States N
2 3 1Time Airline \N 1T RNX NEXTIME South Africa Y
3 4 2 Sqn No 1 Elementary Flying Training School \N None WYT None United Kingdom N
4 5 213 Flight Unit \N None TFU None Russia N

Airports

conn = sqlite3.connect('reading-from-sqlite-db-to-pandas/flights.db')
airports_string = '''SELECT * 
                     FROM airports'''
airports = pd.read_sql(airports_string,
                       conn,
                       index_col='index')
conn.close()
%%bash
cd reading-from-sqlite-db-to-pandas
sqlite3 flights.db
.schema airports
CREATE TABLE airports (
[index] INTEGER,
  [id] TEXT,
  [name] TEXT,
  [city] TEXT,
  [country] TEXT,
  [code] TEXT,
  [icao] TEXT,
  [latitude] TEXT,
  [longitude] TEXT,
  [altitude] TEXT,
  [offset] TEXT,
  [dst] TEXT,
  [timezone] TEXT
);
CREATE INDEX ix_airports_index ON airports ([index]);
airports.info()
<class 'pandas.core.frame.DataFrame'>
Int64Index: 8107 entries, 0 to 8106
Data columns (total 12 columns):
 #   Column     Non-Null Count  Dtype 
---  ------     --------------  ----- 
 0   id         8107 non-null   object
 1   name       8107 non-null   object
 2   city       8107 non-null   object
 3   country    8107 non-null   object
 4   code       5880 non-null   object
 5   icao       8043 non-null   object
 6   latitude   8107 non-null   object
 7   longitude  8107 non-null   object
 8   altitude   8107 non-null   object
 9   offset     8107 non-null   object
 10  dst        8107 non-null   object
 11  timezone   8107 non-null   object
dtypes: object(12)
memory usage: 823.4+ KB
airports.head()
id name city country code icao latitude longitude altitude offset dst timezone
index
0 1 Goroka Goroka Papua New Guinea GKA AYGA -6.081689 145.391881 5282 10 U Pacific/Port_Moresby
1 2 Madang Madang Papua New Guinea MAG AYMD -5.207083 145.7887 20 10 U Pacific/Port_Moresby
2 3 Mount Hagen Mount Hagen Papua New Guinea HGU AYMH -5.826789 144.295861 5388 10 U Pacific/Port_Moresby
3 4 Nadzab Nadzab Papua New Guinea LAE AYNZ -6.569828 146.726242 239 10 U Pacific/Port_Moresby
4 5 Port Moresby Jacksons Intl Port Moresby Papua New Guinea POM AYPY -9.443383 147.22005 146 10 U Pacific/Port_Moresby

Routes

conn = sqlite3.connect('reading-from-sqlite-db-to-pandas/flights.db')
routes_string = '''SELECT * 
                   FROM routes'''
routes = pd.read_sql(routes_string,
                     conn,
                     index_col='index')
conn.close()
%%bash
cd reading-from-sqlite-db-to-pandas
sqlite3 flights.db
.schema routes
CREATE TABLE routes (
[index] INTEGER,
  [airline] TEXT,
  [airline_id] TEXT,
  [source] TEXT,
  [source_id] TEXT,
  [dest] TEXT,
  [dest_id] TEXT,
  [codeshare] TEXT,
  [stops] TEXT,
  [equipment] TEXT
);
CREATE INDEX ix_routes_index ON routes ([index]);
routes.info()
<class 'pandas.core.frame.DataFrame'>
Int64Index: 67663 entries, 0 to 67662
Data columns (total 9 columns):
 #   Column      Non-Null Count  Dtype 
---  ------      --------------  ----- 
 0   airline     67663 non-null  object
 1   airline_id  67663 non-null  object
 2   source      67663 non-null  object
 3   source_id   67663 non-null  object
 4   dest        67663 non-null  object
 5   dest_id     67663 non-null  object
 6   codeshare   14597 non-null  object
 7   stops       67663 non-null  object
 8   equipment   67645 non-null  object
dtypes: object(9)
memory usage: 5.2+ MB
routes.head()
airline airline_id source source_id dest dest_id codeshare stops equipment
index
0 2B 410 AER 2965 KZN 2990 None 0 CR2
1 2B 410 ASF 2966 KZN 2990 None 0 CR2
2 2B 410 ASF 2966 MRV 2962 None 0 CR2
3 2B 410 CEK 2968 KZN 2990 None 0 CR2
4 2B 410 CEK 2968 OVB 4078 None 0 CR2