Technology Sharing

Pandas pywin32 operation excel office automation

2024-07-12

한어Русский языкEnglishFrançaisIndonesianSanskrit日本語DeutschPortuguêsΕλληνικάespañolItalianoSuomalainenLatina

import pandas as pd
import re
import win32com.client as win32
from win32com.client import constants
import os
import os.path as osp
 
#Read the table
path=os.getcwd()
fp=osp.join(path,'fuck_demo.xlsx')
df=pd.read_excel(fp,header=1,usecols=['Serial number','Cable section name (end A-end B)'],nrows=72)#Don't forget nrows
df.set_index('Serial number', inplace=True)#Set index
 
#Filter data
df_1=df[~df['Optical cable section name (end A-end B)'].str.contains('JT01')]
df_2=df[df['Optical cable section name (end A-end B)'].str.contains('JT01')]
 
#Operation reference df_2 data
reg=re.compile(r'(?<=()(.*?)(?=))')
pf=df_2.copy()
pf['Extract AB end'] = pf['Optical cable section name (A end - B end)'].str.extract(reg,expand=False)
pf['Number of optical cable cores']=pf['Name of optical cable section (end A-end B)'].str.extract(r'(?:.*)(-d+)',expand=False)
pf=pf.iloc[::-1]#Reverse order, key
pf['Number of optical cable cores'] = pf['Number of optical cable cores'].replace('-', '', regex=True)#Replace
 
 
#Data processing
ls=[]
for idx,row in pf.iterrows():
strs_2=row['Optical cable section name (end A-end B)']
strs_3=row['Extract AB end']
strs_4=row['Number of optical cable cores']
    if strs_3 in strs_2:
        ls.append([idx,strs_3,strs_4])
 
 
lg=[]#Store the current item equal to the previous item, but not equal to the next item
lf=[]#Store the two items that are not equal
for i in range(len(ls)):
    if ls[i][1]==ls[i-1][1] and ls[i][1]!=ls[i+1][1]:
        lt=ls[i-1:i+2]
l1=[]#Store numbers
l2=[]#Store AB end
l3=[]#Store the number of optical cable cores
        for j in range(len(lt)):
            l1.append(lt[j][0])
            l2.append(lt[j][1])
            l3.append(lt[j][-1])
 
new_l1=[min(l1),max(l1)]#Take the minimum and maximum
new_l1=map(str,new_l1)#List elements are mapped to strings
        new_l2=list(set(l2))+list(set(l3))
res_1='-'.join(new_l1)#Serial number splicing
res_2='-'.join(new_l2)# AB end optical cable core number splicing
res_3=list((res_1,res_2))#Convert to list
        lg.append(res_3)
    else:
l1=ls[i][0]#Store numbers
l2=ls[i][1]#Store AB end
l3=ls[i][-1]#Store the number of optical cable cores
        res_4=list((l1,l2,l3))
        lf.append(res_4)
 
 
 
xh=[]#Serial number
ab=[]#ab end
xs=[]#Number of cores
for k in range(len(lf)):
l1=lf[k][0]#Store numbers
l2=lf[k][1]#Store AB end
l3=lf[k][-1]#Store the number of optical cable cores
    xh.append(l1)
    ab.append(l2)
    xs.append(l3)
 
# Regroup in pairs
xh=[xh[x:x+2] for x in range(0,len(xh),2)]
ab=[ab[x:x+2] for x in range(0,len(ab),2)]
xs=[xs[x:x+2] for x in range(0,len(xs),2)]
 
lm=[]
for n in range(len(xh)):
    l1=list((min(xh[n]),max(xh[n])))
    new_l1=map(str,l1)
    l2=list(set(xs[n]))
    l3=ab[n]+l2
 
    res_1='-'.join(new_l1)
    res_2='-'.join(l3)
    lm.append([res_1,res_2])
 
 
finall_list=lg+lm
 
#Deduplication
finall_ls=[]
for l in finall_list:
    if l not in finall_ls:
        finall_ls.append(l)
 
#Recreate the DataFrame based on the result list
df_3=pd.DataFrame(data=finall_ls,columns=['Serial number','Optical cable section name (end A-end B)'])
df_3.set_index('Serial number',drop=True,inplace=True)#Set the serial number column as the index
 
 
#Splice the cable segment without JT and the processed segment with JT
cf=pd.concat([df_1,df_3],axis=0)
 
 
'''
You can replace the index or not. Anyway, the row number is based on the sequence number + 2
'''
 
#pywin32 read excel table
#Run the excel program
try:
    excel_app = win32.gencache.EnsureDispatch('Excel.Application')
except:
    try:
        win32.gencache.EnsureDispatch('et.Application')
    except:
        win32.gencache.EnsureDispatch('ket.Application')
finally:
    excel_app.Visible = True
    excel_app.DisplayAlerts = False
#Read the required columns in the table
workbook = excel_app.Workbooks.Open(fp)
worksheet = workbook.Worksheets('list')
worksheet.Columns(3).Insert()
worksheet.Columns(4).Insert()
worksheet.Cells(2, 3).Value = 'Drawing number'#Set the column name
worksheet.Cells(2, 4).Value = 'Paragraph Name'#Set the column name.
 
#Operation table
for idx,row in cf.iterrows():
dl=row['Optical cable section name (end A-end B)']
    bh=idx 
    if '-' in str(bh):
        sb=bh.split('-')
start_row=int(min(sb))+2#starting row number
end_row=int(max(sb))+2#end row number
range1_to_merge = worksheet.Range(worksheet.Cells(start_row, 3), worksheet.Cells(end_row, 3))#"Sheet number" of the cell to be merged
range2_to_merge = worksheet.Range(worksheet.Cells(start_row, 4), worksheet.Cells(end_row, 4))#"Paragraph Name" of the cells to be merged
range3_to_color = worksheet.Range(worksheet.Cells(start_row, 5), worksheet.Cells(end_row, 5))#"Optical cable segment name (end A-end B)" to set the color
range4_to_color = worksheet.Range(worksheet.Cells(start_row, 17), worksheet.Cells(end_row, 17))#"Order number + requester" to set the color
range2_to_merge.WrapText = True#Automatic line break
range3_to_color.Interior.Color =7396243 # Light red fill color 804040
        range4_to_color.Interior.Color =7396243
#range3_to_merge.Font.Color = 25500 # Dark red text
range1_to_merge.Value=bh#Drawing number
range2_to_merge.Value=dl#Optical cable section
        range1_to_merge.Merge()     
        range2_to_merge.Merge()
        range2_to_merge.Rows.RowHeight=36
        range2_to_merge.Columns.ColumnWidth = 34
    else:
        worksheet.Cells(idx+2,3).Value = idx
        worksheet.Cells(idx+2,4).Value = dl 
worksheet.Cells(idx+2,4).WrapText = True#Automatic line break
 
 
#End the operation and save the table
workbook.Save()
workbook.Close(SaveChanges=False)
excel_app.Quit()