* Calculate Person days of Observation clear set memory 500M cd e:\chililabdata\data * Create last visit records use observation sort locationid egen lastdate=max(obs_de_date), by(locationid) format lastdate %d drop if !(obs_de_date==lastdate) keep locationid obs_de_date drop if locationid == "" duplicates drop locationid, force sort locationid save lastvisit,replace * Interval for which the Person days of Observation should be computed -- could be any * Interval -- this is a user specified option global startTime = mdy(1, 1,$curryear) global endTime = mdy(12, 31,$curryear) global savefolder = "e:\pdo" use individual sort individid save, replace use individres drop res_episodeid res_seventtype res_sobserveid res_eeventtype res_eobserveid sort individid res_sdate duplicates drop individid res_sdate, force * Bring the gender and birth date into the residency record sort individid merge individid using individual, nokeep keep(gender birthdate) drop if _merge !=3 drop _merge replace res_sdate = birthdate if res_sdate < birthdate * the lastvisit table is built running the lastobservation.do stata file sort locationid merge locationid using lastvisit, nokeep keep (obs_de_date) drop if _merge!=3 rename obs_de_date lastvisit format lastvisit %d drop _merge replace res_edate = mdy(1,1,3000) if (res_edate == .) replace res_edate=lastvisit if res_edate == mdy(1,1,3000) * If the residency start date is before the period of analysis, then we up the sdate * similar analysis for edate replace res_sdate=$startTime if $startTime > res_sdate replace res_edate=$endTime if $endTime < res_edate drop if res_edate < res_sdate * Generate the age at the start and the end of the interval generate StartAge = (res_sdate - birthdate)/ 365.25 generate EndAge = (res_edate - birthdate)/ 365.25 * years represents the # of age groups the individual passes thru during the residency generate years = int(EndAge) - int(StartAge) + 1 * both conditions should not happen (but do) drop if years==. drop if years <= 0 * for every age group the individual goes through during the residency, create new records expand years *drop years * create an age variable to allocate each record to one of the age groups * brow if individid =="01010010101" sort individid res_sdate quietly by individid res_sdate : gen seqvar = _n generate age = int(StartAge + seqvar - 1) * drop seqvar * this should not be necessary -- do it anyway drop if age < 0 drop if age > 120 * this calculates the begin and end of the interval in which the individual is the age * designated by the record * Next three lines are a bit incorrect -- I count the birthday as belonging to the older age -- it is * simply easier to program. To fix, we need to find the last day of a month and define for * case when the person is born on the first of the month (must be some stata code to do this) generate beginInterval=mdy(month(birthdate),day(birthdate) + 1,year(birthdate) + age) *replace beginInterval=mdy(month(birthdate)+1,1,year(birthdate + age)) if beginInterval==. *replace beginInterval=mdy(1,1,year(birthdate + age)+1) if beginInterval==. generate endInterval= mdy(month(birthdate),day(birthdate),year(birthdate) + age + 1) * check this against the residency start date (sdate) and end date replace beginInterval=res_sdate if beginInterval < res_sdate replace endInterval= res_edate if endInterval > res_edate * Calculate the number of days that a person is the age of the record generate days = endInterval-beginInterval + 1 format beginInterval %d format endInterval %d * drop if days < = 0 * Could have part of the age in one residency record and the other part in another residency * brow if individid =="01010010101" gen communeid = substr(locationid, 1,2) gen area=1 replace area=2 if (communeid=="02") replace area=2 if (communeid=="04") replace area=2 if (communeid=="05") replace area=2 if (communeid=="07") recode age (min/0=1 "<1") (1/4=2 "1-4") (5/9=3 "5-9")(10/14=4 "10-14") (15/19=5 "15-19") (20/24=6 "20-24") (25/29=7 "25-29") (30/34=8 "30-34") (35/39=9 "35-39") (40/44=10 "40-44") (45/49=11 "45-49")(50/54=12 "50-54") (55/59=13 "55-59") (60/64=14 "60-64") (65/69=15 "65-69") (70/74=16 "70-74") (75/79=17 "75-79") (80/84=18 "80-84") (85/max=19 ">=85") , gen (agegrp) save $savefolder\pdobydetail$suffix_$curryear, replace sort communeid agegrp age gender collapse (sum) days, by (communeid agegrp age gender) * Now create a table that calculates totals for each age and each gender. gen years = days/365.25 gen male =0 gen female =0 replace male =years if gender == "1" replace female =years if gender == "2" replace male = int(male) replace female = int(female) collapse (sum) male female, by (communeid agegrp age) save $savefolder\pdoby_commune_age$suffix_$curryear, replace collapse (sum) male female, by (communeid agegrp) save $savefolder\pdoby_commune_agegrp$suffix_$curryear, replace collapse (sum) male female, by (agegrp) save $savefolder\pdoby_agegrp$suffix_$curryear, replace use $savefolder\pdobydetail$suffix_$curryear, clear sort area agegrp age gender collapse (sum) days, by (area agegrp age gender) * Now create a table that calculates totals for each age and each gender. gen years = days/365.25 gen male =0 gen female =0 replace male =years if gender == "1" replace female =years if gender == "2" replace male = int(male) replace female = int(female) collapse (sum) male female, by (area agegrp age) save $savefolder\pdoby_area_age$suffix_$curryear, replace collapse (sum) male female, by (area agegrp) save $savefolder\pdoby_area_agegrp$suffix_$curryear, replace collapse (sum) male female, by (agegrp) save $savefolder\pdoby_agegrp$suffix_$curryear, replace