* Calculate Person days of Observation clear set memory 200M cd c:\chililabdata\data * Create last visit records use observation sort locationid egen lastdate=max(obs_de_date), by(locationid) format lastdate %d drop if !(obs_de_date==lastdate) keep locationid obs_de_date drop if locationid == "" duplicates drop locationid, force sort locationid save lastvisit,replace * Interval for which the Person days of Observation should be computed -- could be any * Interval -- this is a user specified option global startTime = mdy($smonth, $sday,$syear) global endTime = mdy($emonth, $eday,$eyear) use individual sort individid save, replace use individres drop res_episodeid res_seventtype res_sobserveid res_eeventtype res_eobserveid sort individid res_sdate duplicates drop individid res_sdate, force * Bring the gender and birth date into the residency record sort individid merge individid using individual, nokeep keep(gender birthdate) drop if _merge !=3 drop _merge replace res_sdate = birthdate if res_sdate < birthdate * the lastvisit table is built running the lastobservation.do stata file sort locationid merge locationid using lastvisit, nokeep keep (obs_de_date) drop if _merge !=3 rename obs_de_date lastvisit format lastvisit %d drop _merge replace res_edate = mdy(1,1,3000) if (res_edate == .) replace res_edate=lastvisit if res_edate == mdy(1,1,3000) * If the residency start date is before the period of analysis, then we up the sdate * similar analysis for edate replace res_sdate=$startTime if $startTime > res_sdate replace res_edate=$endTime if $endTime < res_edate drop if res_edate < res_sdate * Generate the age at the start and the end of the interval generate StartAge = (res_sdate - birthdate)/ 365.25 generate EndAge = (res_edate - birthdate)/ 365.25 * years represents the # of age groups the individual passes thru during the residency generate years = int(EndAge) - int(StartAge) + 1 * both conditions should not happen (but do) drop if years==. drop if years <= 0 * for every age group the individual goes through during the residency, create new records expand years *drop years * create an age variable to allocate each record to one of the age groups * brow if individid =="01010010101" sort individid res_sdate quietly by individid res_sdate : ge seqvar = _n generate age = int(StartAge + seqvar - 1) * drop seqvar * this should not be necessary -- do it anyway drop if age < 0 drop if age > 120 * this calculates the begin and end of the interval in which the individual is the age * designated by the record * Next three lines are a bit incorrect -- I count the birthday as belonging to the older age -- it is * simply easier to program. To fix, we need to find the last day of a month and define for * case when the person is born on the first of the month (must be some stata code to do this) generate beginInterval=mdy(month(birthdate),day(birthdate) + 1,year(birthdate) + age) *replace beginInterval=mdy(month(birthdate)+1,1,year(birthdate + age)) if beginInterval==. *replace beginInterval=mdy(1,1,year(birthdate + age)+1) if beginInterval==. generate endInterval= mdy(month(birthdate),day(birthdate),year(birthdate) + age + 1) * check this against the residency start date (sdate) and end date replace beginInterval=res_sdate if beginInterval < res_sdate replace endInterval= res_edate if endInterval > res_edate * Calculate the number of days that a person is the age of the record generate days = endInterval-beginInterval + 1 format beginInterval %d format endInterval %d * drop if days < = 0 * Could have part of the age in one residency record and the other part in another residency * brow if individid =="01010010101" gen communeid = substr(locationid, 1,2) sort communeid individid age gender collapse (sum) days, by (communeid individid age gender) * This saves Person days for each individual save $savefolder\pdoIndividual, replace collapse (sum) days, by (communeid age gender) * Now create a table that calculates totals for each age and each gender. gen years = days/365.25 gen male =0 gen female =0 replace male =years if gender == "1" replace female =years if gender == "2" recode age (min/0=1 "<1") (1/4=2 "1-4") (5/9=3 "5-9")(10/14=4 "10-14") (15/19=5 "15-19") (20/24=6 "20-24") (25/29=7 "25-29") (30/34=8 "30-34") (35/39=9 "35-39") (40/44=10 "40-44") (45/49=11 "45-49")(50/54=12 "50-54") (55/59=13 "55-59") (60/64=14 "60-64") (65/69=15 "65-69") (70/74=16 "70-74") (75/79=17 "75-79") (80/max=18 ">=80") , gen (agegrp) replace male = int(male) replace female = int(female) collapse (sum) male female, by (communeid agegrp age) save $savefolder\pdoby_commune_age$suffix, replace collapse (sum) male female, by (communeid agegrp) save $savefolder\pdoby_commune_agegrp$suffix, replace collapse (sum) male female, by (agegrp) save $savefolder\pdoby_agegrp$suffix, replace egen totalmale =sum(male) egen totalfemale= sum(female) gen totalage = totalmale + totalfemale gen pmale = -100*male/totalage gen pfemale = 100*female/totalage global hyphen = "/" global refday = day($startTime) global refmonth = month($startTime) global refyear = year($startTime) twoway bar pmale agegrp, horizontal xvarlab(Males)|| bar pfemale agegrp, horizontal xvarlab(Females) || , ylabel(1(1)17, angle(horizontal) valuelabel labsize(*.8)) xtitle("Population by thousands") ytitle("Age group") xlabel(-10 "10" -7.5 "7.5" -5 "5" -2.5 "2.5" 2.5 5 7.5 10) legend(label(1 Males) label(2 Females)) title("Chililab Male and Female Population by Age") subtitle($refday$hyphen$refmonth$hyphen$refyear) note("Source: Chililab DSS Database", span) graph save c:\chililabdata\data\cross\poppyramid$refday$refmonth$refyear.wmf, replace