import pandas as pd import csv #define variables listRepetition = [] #Read the data df = pd.read_csv('INPUT.csv',sep=",",error_bad_lines=False, encoding='utf-8', low_memory=False) #Sort the on customerID and starttimestamp df = df.sort_values(by=["CustomerID", "startTimestamp"], ascending=[True, True]) #initiate the current row row_iterator = df.iterrows() _, last = next(row_iterator) listRepetition.append(False) #iterate from the next row forward for i, row in row_iterator: #Are the current and next row for the same CustomerID if(str(row["CustomerID"]).strip() == str(last["CustomerID"]).strip()): #Are the current and next row the same Activity if((str(row["Activity"]).strip() + str(row["service_detail_EN"]).strip()) == (str(last["Activity"]).strip() + str(last["service_detail_EN"]).strip())): isRepetition = True else: isRepetition = False else: isRepetition = False #Store the data in a list listRepetition.append(isRepetition) last = row #Add the repetition list as a column to the dataframe df["isRepetition"] = listRepetition #Save the data to a file df.to_csv('OUTPUT.csv', quoting=csv.QUOTE_ALL, encoding='utf-8')