CCN data analysis

Posted on Jul 22, 2022

Combine CCN data

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib
import os
from pandas.plotting import scatter_matrix
from scipy import stats, integrate
import seaborn as sns
from pylab import savefig
%matplotlib inline
os.chdir(r'XXX')
Filenames=[];
for files in os.listdir('.'):
    if files.startswith('CCN 100 data'):
        Filenames.append(files)

def Getfile(fnombre):
  df=pd.read_csv(fnombre,skiprows=4)
  df['Date']=pd.read_csv(fnombre,nrows=2).iloc[0,1]
  df.columns=list(pd.Series(list(df.columns)).str.strip(' '))
  df['Datetime']=pd.to_datetime(df['Date'].astype('str')+' '+df['Time'].astype('str'))
  del df['Date']
  del df['Time']
  return df
df_list=[Getfile(fname) for fname in Filenames];
big_df=pd.concat(df_list)
big_df=big_df.rename(columns={'Datetime':'Time'})
big_df['Time']=big_df['Time'].dt.strftime('%Y-%m-%d %H:%M:%S')
big_df=big_df.set_index('Time')
big_df.index=pd.to_datetime(big_df.index)
big_df=big_df.sort_index()
big_df.to_csv('CCNdata.csv',encoding='utf-8')

Dropping the first 5-min data for each supersaturation

import time, datetime
from datetime import datetime, timedelta
dfa=big_df[['Current SS','CCN Number Conc']]
dfa['SS']=dfa['Current SS'].shift(+1)
dfa['SS_Delta']=dfa['Current SS']-dfa['SS']
dfad=dfa.copy()
dfac=dfa[dfa['SS_Delta']!=0]
for i in np.arange(len(dfac)):
    a=dfac.index[i]
    b=a+timedelta(minutes = 5)
    dfad=dfad[~((dfad.index>=a)&(dfad.index<=b))]
dfad=dfad[['Current SS','CCN Number Conc']]
dfad.reset_index().pivot_table(index='Time',columns='Current SS',aggfunc='mean')['CCN Number Conc'].resample('5min').mean().to_csv('CCN_5min.csv')
dfad.reset_index().pivot_table(index='Time',columns='Current SS',aggfunc='mean')['CCN Number Conc'].resample('1H').mean().to_csv('CCN_1H.csv')