Manipulating Data in Salesforce Orgs with Python

22 Jan 2021

from simple_salesforce import Salesforce
import pandas as pd
pd.options.display.max_rows = 999

sf = Salesforce(username='[email protected]',
                password='',
                security_token='ga1r7amRLtuKhxuHW8qkXMZ6')

First, define a function to programmatically obtain all data on the Contacts object

def pull_contacts():
    desc = sf.Contact.describe()
    fields = []
    field_labels = [field['label'] for field in desc['fields']]
    field_names =  [field['name']  for field in desc['fields']]
    field_types =  [field['type']  for field in desc['fields']]
    for label, name, dtype in zip(field_labels, field_names, field_types):
        fields.append((label, name, dtype))
    fields = pd.DataFrame(fields,
                          columns = ['label','name','type'])
    obj = 'Contact'
    field_list = ', '.join(list(fields['name']))
    query = 'SELECT ' + field_list + '''
    FROM ''' + obj
    q = sf.query_all(query)
    c = (pd.DataFrame(q['records'])
         .drop(columns=['attributes']))
    return c

c = pull_contacts()
c = c[['Id','Name','MobilePhone','Email','Department']].copy()
c.info()
c.head()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 20 entries, 0 to 19
Data columns (total 5 columns):
 #   Column       Non-Null Count  Dtype 
---  ------       --------------  ----- 
 0   Id           20 non-null     object
 1   Name         20 non-null     object
 2   MobilePhone  18 non-null     object
 3   Email        18 non-null     object
 4   Department   17 non-null     object
dtypes: object(5)
memory usage: 928.0+ bytes

	Id	Name	MobilePhone	Email	Department
0	0034x0000072sLDAAY	Stella Pavlova	(111) 111-1111	[email protected]	Production
1	0034x0000072sLPAAY	Jake Llorrac	None	None	None
2	0034x0000072sLIAAY	Arthur Song	(222) 222-2222	[email protected]	Executive Team
3	0034x0000072sLOAAY	Siddartha Nedaerk	(555) 555-5555	None	None
4	0034x0000072sLAAAY	Andy Young	(333) 333-3333	[email protected]	Internal Operations

c = pull_contacts()

Update

Imagine there is separate data with updated mobile phone numbers for several of the records.

updated_phones = pd.DataFrame([('Stella Pavlova',    '(111) 111-1111'),
                               ('Arthur Song',       '(222) 222-2222'),
                               ('Andy Young',        '(333) 333-3333'),
                               ("Liz D'Cruz",        '(444) 444-4444'),
                               ('Siddartha Nedaerk', '(555) 555-5555')],
                              columns=['Name', 'Updated_MobilePhone'])
updated_phones

	Name	Updated_MobilePhone
0	Stella Pavlova	(111) 111-1111
1	Arthur Song	(222) 222-2222
2	Andy Young	(333) 333-3333
3	Liz D'Cruz	(444) 444-4444
4	Siddartha Nedaerk	(555) 555-5555

m = pd.merge(c, updated_phones,
             on=['Name'],
             how='left')
print(m.shape)
m = (m
     .dropna(subset=['Updated_MobilePhone'])
     .reset_index(drop=True))
m['MobilePhone'] = m['Updated_MobilePhone']
print(m.shape)
m = m[['Id','Name','MobilePhone','Email','Department']].copy()
m.head()

(20, 64)
(5, 64)

	Id	Name	MobilePhone	Email	Department
0	0034x0000072sLDAAY	Stella Pavlova	(111) 111-1111	[email protected]	Production
1	0034x0000072sLIAAY	Arthur Song	(222) 222-2222	[email protected]	Executive Team
2	0034x0000072sLOAAY	Siddartha Nedaerk	(555) 555-5555	None	None
3	0034x0000072sLAAAY	Andy Young	(333) 333-3333	[email protected]	Internal Operations
4	0034x0000072sLLAAY	Liz D'Cruz	(444) 444-4444	[email protected]	Production

Write just the necessary columns for update to a list of dictionaries from Pandas.

updated_phones = m[['Id','MobilePhone']].to_dict('records')
updated_phones

[{'Id': '0034x0000072sLDAAY', 'MobilePhone': '(111) 111-1111'},
 {'Id': '0034x0000072sLIAAY', 'MobilePhone': '(222) 222-2222'},
 {'Id': '0034x0000072sLOAAY', 'MobilePhone': '(555) 555-5555'},
 {'Id': '0034x0000072sLAAAY', 'MobilePhone': '(333) 333-3333'},
 {'Id': '0034x0000072sLLAAY', 'MobilePhone': '(444) 444-4444'}]

Use Bulk API functions via SimpleSalesforce. Capture returned results as a DataFrame.

update_results = pd.DataFrame(sf.bulk.Contact.update(updated_phones,
                                                     batch_size=200,
                                                     use_serial=True))
update_results

	success	created	id	errors
0	True	False	0034x0000072sLDAAY	[]
1	True	False	0034x0000072sLIAAY	[]
2	True	False	0034x0000072sLOAAY	[]
3	True	False	0034x0000072sLAAAY	[]
4	True	False	0034x0000072sLLAAY	[]

c = pull_contacts()
c = c[['Id','Name','MobilePhone','Email','Department']].copy()
c.head()

	Id	Name	MobilePhone	Email	Department
0	0034x0000072sLDAAY	Stella Pavlova	(111) 111-1111	[email protected]	Production
1	0034x0000072sLPAAY	Jake Llorrac	None	None	None
2	0034x0000072sLIAAY	Arthur Song	(222) 222-2222	[email protected]	Executive Team
3	0034x0000072sLOAAY	Siddartha Nedaerk	(555) 555-5555	None	None
4	0034x0000072sLAAAY	Andy Young	(333) 333-3333	[email protected]	Internal Operations

Insert

Imagine there are a few new records that need to be added.

new_records = [
    {'LastName':'Doe', 'FirstName':'John', 'Email':'[email protected]', 'MobilePhone':'(666) 666-6666'},
    {'LastName':'Doe', 'FirstName':'Jane', 'Email':'[email protected]', 'MobilePhone':'(777) 777-7777'},
]

insert_results = pd.DataFrame(sf.bulk.Contact.insert(new_records,
                                                     batch_size=200,
                                                     use_serial=True))
insert_results

	success	created	id	errors
0	True	True	0034x000006txd2AAA	[]
1	True	True	0034x000006txd3AAA	[]

new_ids = insert_results['id'].values
new_ids

array(['0034x000006txd2AAA', '0034x000006txd3AAA'], dtype=object)

c = pull_contacts()
print(str(c.shape[0]) + ' Records, ' + str(c.shape[1]) + ' Fields')
c = (c[c['Id'].isin(new_ids)]
     [['Id','LastName','FirstName','Email']]
     .reset_index(drop=True))
c.head()

22 Records, 63 Fields

	Id	LastName	FirstName	Email
0	0034x000006txd3AAA	Doe	Jane	[email protected]
1	0034x000006txd2AAA	Doe	John	[email protected]

Delete

Delete the records just inserted.

delete_ids = c[['Id']].to_dict('records')
delete_ids

[{'Id': '0034x000006txd3AAA'}, {'Id': '0034x000006txd2AAA'}]

delete_results = pd.DataFrame(sf.bulk.Contact.delete(delete_ids,
                                                     batch_size=200,
                                                     use_serial=True))
delete_results

	success	created	id	errors
0	True	False	0034x000006txd3AAA	[]
1	True	False	0034x000006txd2AAA	[]

c = pull_contacts()
print(str(c.shape[0]) + ' Records, ' + str(c.shape[1]) + ' Fields')

20 Records, 63 Fields