Wednesday, July 29, 2009

SAS: How to Get ParameterEstimates into a SAS Data set

/* How to Get ParameterEstimates into a Data set */
data test;
input x1 x2 y;
datalines;
12 23 34
23 34 34
34 23 34
;
run;
ods trace on;
proc reg data=test;
model y= x1 x2;
ods output ParameterEstimates=ParmEst;
run;
ods trace off;
/* Name of the Data set you will get it from Log file - Example is ParameterEstimates */
Output Added:
-------------
Name: ParameterEstimates
Label: Parameter Estimates
Template: Stat.REG.ParameterEstimates
Path: Reg.MODEL1.Fit.y.ParameterEstimates
-------------

Friday, July 10, 2009

Statistics: TURF Analysis

TURF Analysis, an acronym for "Total Unduplicated Reach and Frequency", is a type of statistical analysis used for providing estimates of media or market potential and devising optimal ways how to use it given the limited resources.

TURF is a statistical model that can used to answer questions like :
1) Where should we place ads to reach the widest possible audience?
2) What kind of market-share will we gain if we add a new line to our model?

http://en.wikipedia.org/wiki/TURF_Analysis
http://www.questionpro.com/akira/showArticle.do?articleID=turf01

Excel VBA: Loops Syntax

Sub ForLoop()
Dim I As Long

For I = 1 To 5 Step 1
MsgBox I
Next I

For I = 5 To 1 Step -1
MsgBox I
Next I

End Sub
---------------------------------



Sub DoLoop1()

Dim I As Long

I = 0
Do While I < 0
MsgBox I
I = I + 1
Loop

End Sub
---------------------------------



Sub DoLoop2()

Dim I As Long

I = 0
Do
MsgBox I
I = I + 1
Loop While I < 0

End Sub

---------------------------------


Sub DoLoop3()
Dim I As Long

I = 0
Do Until I > 5
MsgBox I
I = I + 1
Loop

End Sub

---------------------------------


Sub DoLoop4()
Dim I As Long

I = 0
Do
MsgBox I
I = I + 1
Loop Until I > 5

End Sub

Excel VBA: Search And Replace in different files

Sub SearchAndReplace()
Dim oFSO As New FileSystemObject
Dim oFileIn As TextStream
Dim oFileOut As TextStream
Dim sLine As String

If Not oFSO.FileExists("C:\Test\Try04.txt") Then
MsgBox "File Not Found", vbCritical
Exit Sub
End If

Set oFileIn = oFSO.OpenTextFile("C:\Test\Try04.txt")
Set oFileOut = oFSO.CreateTextFile("C:\Test\Try05.txt")

Do While Not oFileIn.AtEndOfStream
sLine = oFileIn.ReadLine
sLine = Replace(sLine, "4", "X")

'sLine = Replace(oFileIn.ReadLine, "4", "X")

oFileOut.WriteLine sLine

Loop
oFileIn.Close
oFileOut.Close

Set oFileIn = Nothing
Set oFileOut = Nothing
Set oFSO = Nothing

MsgBox "Over", vbInformation
End Sub

Excel VBA: How to Create a Word Table

Sub CreateWordTable()
Dim oWORD As Word.Application
Dim oDOC As Word.Document
Dim oTable As Word.Table

Dim oWS As Worksheet
Dim lMaxRow As Long
Dim lRow As Long
Dim lMaxCol As Long
Dim lCol As Long

Set oWS = Worksheets(1)
lMaxRow = oWS.Cells.SpecialCells(xlCellTypeLastCell).Row
lMaxCol = oWS.Cells.SpecialCells(xlCellTypeLastCell).Column

Set oWORD = New Word.Application
oWORD.Visible = True
Set oDOC = oWORD.Documents.Add
Set oTable = oDOC.Tables.Add(oWORD.Selection.Range, lMaxRow, lMaxCol)
For lRow = 1 To lMaxRow Step 1
For lCol = 1 To lMaxCol Step 1
oTable.Rows(lRow).Cells(lCol).Range.Text = oWS.Cells(lRow, lCol)
Next lCol
Next lRow

Set oTable = Nothing
oDOC.Save
oDOC.Close
Set oDOC = Nothing
oWORD.Application.Quit
Set oWORD = Nothing
End Sub

Excel VBA: How to Create a PowerPointTable

Sub CreatePowerPointTable()
Dim oPPTApp As PowerPoint.Application
Dim oPPT As PowerPoint.Presentation
Dim oSlide As PowerPoint.Slide
Dim oTable As PowerPoint.Table

Dim oWS As Worksheet
Dim lMaxRow As Long
Dim lRow As Long
Dim lMaxCol As Long
Dim lCol As Long

Set oWS = Worksheets(1)
lMaxRow = oWS.Cells.SpecialCells(xlCellTypeLastCell).Row
lMaxCol = oWS.Cells.SpecialCells(xlCellTypeLastCell).Column

Set oPPTApp = New PowerPoint.Application
oPPTApp.Visible = True
Set oPPT = oPPTApp.Presentations.Add
Set oSlide = oPPT.Slides.Add(1, ppLayoutBlank)
Set oTable = oSlide.Shapes.AddTable(lMaxRow, lMaxCol).Table
For lRow = 1 To lMaxRow Step 1
For lCol = 1 To lMaxCol Step 1
oTable.Rows(lRow).Cells(lCol).Shape.TextFrame.TextRange.Text = oWS.Cells(lRow, lCol)
Next lCol
Next lRow

Set oTable = Nothing
Set oSlide = Nothing
oPPT.Save
oPPT.Close
Set oPPT = Nothing
oPPTApp.Quit
Set oPPTApp = Nothing
End Sub

Excel VBA: How to Create a Text File

Sub CreateTXTFile()
Dim oFSO As New FileSystemObject
Dim oTS As TextStream
Dim oWS As Worksheet
Dim lMaxRow As Long
Dim lRow As Long
Dim lMaxCol As Long
Dim lCol As Long
Dim lColWidths(1 To 3) As Long
Dim sLine As String
Dim sData As String
Dim lStart As Long
lColWidths(1) = 30
lColWidths(2) = 20
lColWidths(3) = 5

Set oWS = Worksheets(1)
lMaxRow = oWS.Cells.SpecialCells(xlCellTypeLastCell).Row
lMaxCol = oWS.Cells.SpecialCells(xlCellTypeLastCell).Column

Set oTS = oFSO.CreateTextFile("C:\Test\Map.csv")
oTS.WriteLine "Variable,Start,Width"
lStart = 1
For lCol = 1 To lMaxCol Step 1
oTS.WriteLine oWS.Cells(1, lCol) & "," & lStart & "," & lColWidths(lCol)
lStart = lStart + lColWidths(lCol)
Next lCol
oTS.Close

Set oTS = oFSO.CreateTextFile("C:\Test\Test3.txt")
For lRow = 2 To lMaxRow Step 1
sLine = vbNullString
For lCol = 1 To lMaxCol Step 1
If (lCol < 3) Then
sData = Left$(oWS.Cells(lRow, lCol) & Space(lColWidths(lCol)), lColWidths(lCol))
Else
sData = Right$(Space(lColWidths(lCol)) & oWS.Cells(lRow, lCol), lColWidths(lCol))
End If
sLine = sLine & sData
Next lCol
oTS.WriteLine sLine
Next lRow
oTS.Close
Set oTS = Nothing
Set oFSO = Nothing
MsgBox "Over"
End Sub

EXCEL VBA: Text Coding (verbatim coding)

Sub Text_Coding()

'Please save Keywords in column 1 of sheet2 and corresponding codes in column 2 in the same sheet
'Save Text in column 1 of sheet1 after running this macro it will generate codes in column 2 in this sheet

Dim txt As Variant
n = Sheet1.Cells.SpecialCells(xlCellTypeLastCell).Row
m = Sheet2.Cells.SpecialCells(xlCellTypeLastCell).Row
For i = 2 To n
Sheet1.Cells(i, 2) = ""
Set searchRange = Sheet1.Cells(i, 1)
For key_w = 2 To m
Set FoundCell = searchRange.Find(what:=Sheet2.Cells(key_w, 1))
If Not FoundCell Is Nothing Then
If (Len(Sheet1.Cells(i, 2)) > 0) Then
Sheet1.Cells(i, 2) = Sheet2.Cells(key_w, 2) & "," & Sheet1.Cells(i, 2)
Else
Sheet1.Cells(i, 2) = Sheet2.Cells(key_w, 2)
End If
End If
Next
Next
End Sub

Thursday, July 9, 2009

Statistics: Do the nonlinear regression results make sense?

http://curvefit.com/are_results_sensible_.htm

http://curvefit.com/

Satistics: Time-Critical Decision Modeling and Analysis

http://home.ubalt.edu/ntsbarsh/stat-data/Forecast.htm#rtestAvovaKS1

EXCEL: User Form Creation in Excel VBA

http://pubs.logicalexpressions.com/pub0009/LPMArticle.asp?ID=388

Statistics: Phi coefficient:Measure of the degree of association between two binary variables

http://www.childrensmercy.org/stats/definitions/phi.htm

Statistics: Online An Introductory Statistics Textbook

http://davidmlane.com/hyperstat/

Statistics: Identify the orders of both stationary and nonstationary ARIMA

http://www.caspur.it/risorse/softappl/doc/sas_docs/ets/chap7/sect56.htm

SAS: Proc Rank

data staff;
input ID SALARY Department;
datalines;
50 20000 1
10 18000 1
40 28000 2
20 16000 2
30 22000 3
70 26000 1
;run;

proc sort data=staff;by DEPARTMENT;run;

proc rank data=staff out=rankings;
By DEPARTMENT;
var SALARY;
ranks Finish;
run;

/* in Oracle or DB2 */

SELECT A.* FROM

(
SELECT ID, SALARY, DEPARTMENT,
RANK () OVER (PARTITION BY DEPARTMENT ORDER BY SALARY DESC) AS RANK
FROM staff s1
) AS A

WHERE A.RANK=1

Wednesday, July 8, 2009

Statistics: Don't believe in the Null Hypothesis?

http://creative-wisdom.com/computer/sas/hypothesis.html

Statistics: Assumptions of Linear Regression

http://homepages.ius.edu/WCLANG/m305/notes11.htm

Statistics: Bootstrapping - Building models with lesser data (<30)

Most of the times we often come across the problem of fitting models with lesser data (<30).

To overcome this problem we can use the following methodology:

Residual Resampling – Bootstrapping:

Bootstrapping is the practice of estimating properties of an estimator by resampling. Residual Resampling is one of the types of bootstrapping.

Steps:
OLS fit is computed from the original data.
The residuals are then resampled and the independent variables are fixed.
The residuals are then added to the predicted values of the original fit to obtain a new Y vector.
This new Y vector is then fit against the original X variables.

It is advised that we can have 100 bootstrap samples for the validity of the model. For each bootstrap sample the model is fit. Then calculate the mean of the parameter estimates and std errors. This will be our final estimate.

As sas don’t have the direct procedure to calculate this method, I manually coded the procedure. This is given below:

/* Model data set */

title 'Cement Hardening Data from Hjorth, p 31';
data cement;
input x1-x4 y;
label x1='3CaOAl2O3'
x2='3CaOSiO2'
x3='4CaOAl2O3Fe2O3'
x4='2CaOSiO2';
cards;
7 26 6 60 78.5
1 29 15 52 74.3
11 56 8 20 104.3
11 31 8 47 87.6
7 52 6 33 95.9
11 55 9 22 109.2
3 71 17 6 102.7
1 31 22 44 72.5
2 54 18 22 93.1
21 47 4 26 115.9
1 40 23 34 83.8
11 66 9 12 113.3
10 68 8 12 109.4

;
run;

/*Original fit to get the residuals */
proc reg data=cement;
model y=x1-x4;
output out=cemout r=resid p=pred;
run;

/*Resampling the residuals*/

data spds_wrk.ABC_boot(drop=resid) Spds_wrk.ABC_resid(keep=resid index);
set cemout;
index=_N_;
run;
data spds_wrk.ABC_OUTDATA (drop= i j);
do i = 1 TO 100;
sample_n = i;
do j = 1 TO 13;
index=j;
join_index = int(ranuni(123) * 13) + 1;
output;
end;
end;
run;

data spds_wrk.ABC_model_data;
merge spds_wrk.ABC_boot spds_wrk.ABC_OUTDATA;
by index;
run;

data spds_wrk.ABC_model_data;
merge spds_wrk.ABC_model_data spds_wrk.ABC_resid(rename=(index=join_index));
by join_index;
run;

proc sort data=spds_wrk.ABC_model_data;
by sample_n index;
run;

data spds_wrk.ABC_updated;
set spds_wrk.ABC_model_data;
ycap=pred+resid;
run;

/*Buliding the model with changed y 100 times */

proc reg data=spds_wrk.ABC_updated outest=pred;
model ycap=x1 x2 x3 x4;
by sample_n;
run;

/* Final Estimates*/
proc means data=pred;
var intercept x1 x2 x3 x4;
run;


The mean of Intercept, x2, x2, x3, and x4 will be the final estimates and its std deviations are the final std errors.

http://www.itl.nist.gov/div898/software/dataplot/refman1/auxillar/bootfit.htm
http://en.wikipedia.org/wiki/Bootstrapping_%28statistics%29

UNIX: Commands

http://www.ccsf.edu/Pub/Fac/vi.html

Excel: Excel Chart to Power Point

http://www.ozgrid.com/forum/showthread.php?t=20999&page=5

http://peltiertech.com/Excel/XL_PPT.html#chartppt

Excel: Excel Chart to Power Point

http://www.ozgrid.com/forum/showthread.php?t=20999&page=5

Excel: Boxplots in Excel

http://www.coventry.ac.uk/ec/~nhunt/boxplot.htm

Excel: Dynamic Chart in Excel

http://peltiertech.com/Excel/Charts/DynamicColumnChart1.html

SAS: Running Excel Macro From SAS

options noxwait noxsync;
x '"C:\Program Files (x86)\Microsoft Office\Office12\excel.exe"
"C:\Documents and Settings\Sobhan\Desktop\book2.XLS"'; *Pls choose appropriate files 1.to enable excel, 2. open a specified excel file;

data _null_;
x=sleep(5); *for opening Excel it take some time;
run;

filename EXPORT DDE "EXCELSheet1!r1c1:r3c2" ;

data exp;
input name$ marks;
cards;
abc 46
xyz 28
bcd 60
;

data _null_;
set exp;
file EXPORT;
put name marks;
run;

filename savec DDE 'EXCELSYSTEM'; *relating between Excel and SAS;

DATA _NULL_;
FILE savec;
PUT '[run("macro1")]'; *Pls write a macro named macro1 in excel and put macro security in last option i.e.High;
PUT '[save()]';
PUT '[quit()]';
RUN;

SAS: String Reverse in SAS

data test;
input text $10.;
datalines;
srikanth
boyina
;run;
%let DataName = test;
%let Variable = Text;
%let length = 32;
%macro rever;
data New;format &Variable._New $32.;
set &DataName.;&Variable._New="_"Put(_N_,z5.0)&Variable.;
run;
data New; set New end = last;
if last then call symput ("N",_n_);run;
data b;
set New;
%do i=7 %to &length.;
_column_&i.=substr(&Variable._New,&i.,1);
%end;
run;
proc transpose data=b out=test(drop=_name_);id &Variable._New; var _column_7-_column_&length.;run;
%do k = 1 %to &N.;
data _null_; set New;
if _n_ = &k. then call symput ("V",&Variable._New);
run;
proc sql noprint;select &V. into :var separated by "" from test order by &V.; quit;
data New;
format SortVar $32.;
set New;
if _n_ = &k. then SortVar = "&var.";
run;
%end;
proc datasets nolist; delete b test; run; quit;
%mend;
%rever;

Excel VBA: List all File Names in a Folder

Sub ListFiles()
Dim oFSO As New FileSystemObject
Dim oFolder As Folder
Dim I As Long
Dim oFile As File
Set oFolder = oFSO.GetFolder("C:\test\teampics")
For I = 1 To oFolder.Files.Count
For Each oFile In oFolder.Files
MsgBox oFile.Name
Next
Next I
MsgBox "Over", vbInformation
End Sub

Tuesday, July 7, 2009

SAS: Factorial Program

%Macro Fact(n);
%let R = 1;
%if &n. = 0 %then %let R = 1;
%if &n. = 1 %then %let R = 1;
%if &n.>1 %then %do;
%do i = 1 %to &n. %by 1;
%let R = %SysEvalf(&R.*&i.);
%end;
%end;

%Put &R.;
%Mend;
%Fact(0);

UNIX: JOB Scheduling

How to run at unix box : -
nohup /apps/sas/SAS_9.1/sas /gdr/home_dir/id/trans1.sas &


Scheduling
crontab.dat
01 17 * * * nohup /apps/sas/SAS_9.1/sas /gdr/home_dir/id/trans1.sas &
http://www.adminschoice.com/docs/crontab.htm

For appending to the server list of crons -

crontab < crontab.dat

For editing -> crontab –e
For to view -> crontab –l

Statistics: Multi-collinearity Variance Inflation and Orthogonalization in Regression

http://creative-wisdom.com/computer/sas/collinear.html

SAS: Creating Scatter Plots - (m*n Chart)

http://support.sas.com/documentation/cdl/en/procstat/59629/HTML/default/procstat_corr_sect033.htm

Online Seminars and Classes

http://www.ats.ucla.edu/stat/seminars/default.htm

SAS: Statistical tests using SAS

http://www.ats.ucla.edu/stat/sas/whatstat/whatstat.htm

SAS: Topics in SAS Programming

http://www.cpc.unc.edu/services/computer/presentations/sasclass99/

Keeping only the variables you want
Keeping only the observations you want
Retained variables
Working with grouped observations
Introduction to merging in SAS
SAS arrays
Restructuring a SAS data set: from a mother file to a child file
Restructuring a SAS data set: from a child file to a mother file
SAS Work Environment
SAS Comments
SAS System Options
Basic SAS Procedures - PROC CONTENTS
Basic SAS Procedures - PROC FREQ
Basic SAS Procedures - PROC PRINT
Processing a Limited Number of Observations
SAS Output Delivery System (ODS)
Output SAS Results in HTML (Excel) Files Using ODS
Using SAS Efficiently with Very Large Datasets
ML Sampling Weights


http://www.cpc.unc.edu/services/computer/presentations/sasclass99/

SAS: 2 level Combination

data abc;
input name $10.;
datalines;
Srikanth
Sarbadal
Boyina
Pal
Debajit
Ghosh
;
run;
proc sql noprint;select name into :var separated by " " from abc;
proc transpose data=abc out=result;var name;run;

data comb;
keep var1 var2;
array a(6) $ col1-col6;
set result;
do i=1 to 6;
do j=i+1 to 6;
var1=a(i);
var2=a(j);output;
end;
end;
run;

Monday, July 6, 2009

SAS: Replace Missing with Zeros - only for all numeric variables

data test;
format z date8.;
input x y $ z mmddyy10. w;
datalines;
12 srikanth 3-01-1988 8.9
34 kanth 2-7-2009 8.34
12 srikanth 3-01-1988 8.9
34 kanth 2-7-2009 8.34
. srikanth 3-01-1988 8.9
34 kanth 2-7-2009 8.34
;
run;
/* Please provide data set name over here
it will create new data set as copy_old data set name */
%let orginal_dataset=test;
proc contents data=&orginal_dataset. out=data_map(keep=name type format);run;
data data_map;set data_map;
if (type=1 and format ="");run;
data _null_;set data_map;
call symput("n",_N_);
call symput('Var'left(_n_), name);run;
%macro missing_with_zero;
data copy_&orginal_dataset.;set &orginal_dataset.;
%do i=1 %to &n.;
if (&&var&i. eq .) then &&var&i.=0;
%end;run;
%mend;
%missing_with_zero;

SAS: Sending Attachment


data test;
input x;
datalines;
12
23
;
run;


%LET sri_file = /gdr/home_dir/z062709/;


ODS CSV FILE="&sri_file.test.csv";
PROC PRINT DATA= test NOobs label; RUN;
ODS CSV close;

FILENAME OUTBOX EMAIL boyina.srikanth@gmail.com;

DATA _null_;

FILE OUTBOX
TO=("srikanthkumar.boyina@target.com")
SUBJECT="Data From SAS"
ATTACH="&sri_file.test.csv";
RUN;

SAS: Delete Formates

data test;
set data ;
attrib _all_ label='';
format _all_;
informat _all_;
run;

SAS: Grid Table

data test;
keep a1-a1260 Weight;
set sri.target_home;
a1=qc6_Bath_JCpenney_1;
a2=qc6_Bath_JCpenney_2;
a3=qc6_Bath_JCpenney_3;
a4=qc6_Bath_JCpenney_4;
a5=qc6_Bath_JCpenney_5;
a6=qc6_Bath_JCpenney_6;
a7=qc6_Bath_JCpenney_7;
a8=qc6_Bath_JCpenney_8;
a9=qc6_Bath_JCpenney_9;
a10=qc6_Bath_JCpenney_10;
run;
%macro tet;
proc freq data=test;
%do i=1 %to 100;
table a&i./out=test_a&i.(rename=(a&i.=id count=a&i.) drop=percent);
%end;
WEIGHT Weight;
run;
data final;
merge
%do i=1 %to 100;
test_a&i.
%end;;
by id;
run;
%mend;
%tet;
proc transpose data=final out=t123;
run;

SAS: Converting Char format to date format

data test;
input tes $20.;
datalines;
20090406
;
run;

data ttt;
format dd date9.;
set test;
dd=input(tes,ANYDTDTE11.);
run;

SAS: Read in any type of date format

Hou to read/extract any type of date using ANYDTDTE informat.
The following example illustrates this.

/* Read in raw data – various types of date format */

Data dates;
Input cdate $22.;
Cards;
16-apr-07
01-02-07
2007-05-06
02-jun-07
13-sep-2007
01JAN2009 14:30:08.5
;
Run;

/* Convert them to required date format using AnydtdteW */

Data Convert;
Set dates;
Date = Input (cdate, ANYDTDTE21.);
Format date date9.;
Run;

/* Output */

16APR2007
02JAN2007
06MAY2007
02JUN2007
13SEP2007
01JAN2009

This way you can read in any type of date format into SAS without having to worry about the right informat!

SAS - how to ‘Extract multiple matching patterns using Perl regular expressions (PRX) from a text string’.

The regular expressions are:

1) \d refers to any digit (0 - 9),
2) \D to any non-digit, and
3) \w to any word character (A-Z, a-z, 0-9, and _).
4) The three metacharacters, *, +, and ? are particularly useful because they add quantity to a regular expression.*/

/*The below example extracts phone numbers from a string */

/*To identify a phone number use the below expressions **/
\( matches an open parenthesis ; \d\d\d matches three digits
\) matches a closed parenthesis; ? matches zero or more occurances of the previous pattern
\d\d\d matches three digits; - matches a dash
\d{4} matches four digits */

/***************************************************************/
DATA EXTRACT;
IF _N_ = 1 THEN DO;
PATTERN = PRXPARSE ("/\(\d\d\d\) ?\d\d\d-\d{4}/");
END;
RETAIN PATTERN;
LENGTH NUMBER $ 15;
INPUT STRING $CHAR80.;
CALL PRXSUBSTR(PATTERN,STRING,START,LENGTH);
IF START GT 0 THEN DO;
NUMBER = SUBSTR(STRING,START,LENGTH);
NUMBER = COMPRESS(NUMBER," ");
OUTPUT;
END;
KEEP NUMBER START LENGTH;
DATALINES;
THIS LINE DOES NOT HAVE ANY PHONE NUMBERS ON IT
THIS LINE DOES: (123)345-4567 LA DI LA DI LA
ALSO VALID (123) 999-9999
TWO NUMBERS HERE (333)444-5555 AND (800)123-4567
;
/**************************************************************/

The ‘PRXPARSE’ function is used to create regular expressions.
Syntax: PRXPARSE (perl-regular-expression)
The ‘PRXSUBSTR’ function returns the length of the match as well as the starting position.
Syntax: PRXSUBSTR(pattern-id, string, start, )


Run the above code in SAS to understand how this works.

For more details visit:
http://support.sas.com/documentation/cdl/en/lrdict/59540/HTML/default/a002288677.htm

SAS: Proc Sql like _N_ in DATA step

Proc Sql;
Select monotonic() as rownum , a.*
From sashelp.class a
Where monotonic() <5;
Quit;

SAS: Identify duplicate and nonduplicate observations in a SAS data set

We can achieve this by using DUPOUT= option on the PROC SORT statement. /* The below code creates the duplicate observations in a new data set called dups while the remaining observations stay in no_dup */
/***********************************************************/
Proc Sort data=sashelp.class out=no_dup dupout=dups nodupkey;
by age;
Run;

/***********************************************************/

SAS LOG:
NOTE: There were 19 observations read from the data set SASHELP.CLASS.
NOTE: 13 observations with duplicate key values were deleted.
NOTE: The data set WORK.NO_DUP has 6 observations and 5 variables.NOTE: The data set WORK.DUPS has 13 observations and 5 variables.
Note:
The DUPOUT option is effective only when used with the NODUPKEY or NODUPREC/NODUP options. Without one of
these options, the log will show a WARNING message and the DUPOUT data set will be created with 0 records.

UNIX: How to kill/pause/break jobs in Unix

1) To display full information about each of the processes currently running.
ps –fu USERID

2) To pause/break the currently running job and resume.
Kill –s STOP - To Pause the job
Kill –s CONT - To Resume the paused job

3) To display how much free disk space is available for each file system you have
df –h

4) The below command displays how much space a file occupies.
du –kh

5) To compress a file
compress FILENAME

Use the last two tips to efficiently manage your disk space. Delete the files you don’t require, or that can be regenerated easily. If you need to keep them, compress and store them.

SAS: Create multiple datasets using a single Proc Means Procedure

e.g., we want to analyze data based on the following 3 combinations. Analysis Variable is fraud_amount. The 3 classification variables being
By fraud_type
By year and fraud_type
By year, month and fraud_type

Proc Means data=sales_data chartype;
Class year month fraud_type;
Var fraud_amount ;
Output out=fraud_type (where= (_type_='001') drop=year month) sum=; /*By fraud_type*/
Output out=year_fraud_type (where =(_type_='101' and year=2008) drop=month) sum=;
Output out=yr_month_fraud_type (where =(_type_='111')) sum=;
Run;

Chartype - converts the (default) numeric values of _TYPE_ to a character variable containing zeros and ones. The length of this variable is equal to the number of variables in the CLASS (or BY) statement.

_type_ - indicates which classification variables SAS should consider for analysis. i.e. _type_ = ‘1’ instructs SAS to consider the variable & ‘0’ instructs SAS to ignore the variable.


This way we do not have to have separate proc means for each classification, but we can combine them all in one step!!

SAS: IDENTIFY AND EXTRACT ALL EXCEL FILE NAMES IN A SPECIFIED FOLDER in SAS

In SAS, there are various functions to process external files. These functions can open a
directory, determine the number of members of the directory, identify the file names that can be used to process the
data, and close the directory. File processing can be done dynamically without ever knowing the physical file name.

Few functions are DOPEN, DNUM, DREAD

DOPEN - This external file function opens a directory using a file reference as the argument and returns a numeric directory identifier value. If the value returned is a 0, then the directory was not found. Otherwise, a returned value greater than 0 identifies the opened directory and can be used in other SAS external file functions.

DNUM - This external file function returns the number of members in a directory using the directory identifier returned by the DOPEN function.

DREAD - This external file function uses the identifier returned by the DOPEN function and returns a directory member name. By using the value of the total number of files in a directory returned by DNUM, all files in a directory can be processed in a loop.


Example - IDENTIFY AND EXTRACT ALL EXCEL FILE NAMES IN A SPECIFIED FOLDER - Assume you have a folder which contains many files of different types and you want to extract the file names of one particular type (e.g., Excel workbooks); The following DATA step will extract their file names:



DATA xls_files(keep=file_name);
LENGTH file_name $30 ;
rc= filename("dir","~");
d=DOPEN("dir");
n=DNUM(d);
do i=1 to n;
file_name=DREAD(d,i) ;
if LOWCASE(SCAN(file_name,2,'.')) EQ 'xls' then output;
end;
run;

SAS: How to create password protected SAS Datasets through EG

/*Assigning password "green" to dataset "testpass"*/
proc datasets lib = work;
modify testpass (pw=green);
run;


/*Change Existing password "green" to "red" for dataset "testpass"*/
proc datasets lib = work;
modify testpass (pw=green/red);
run;

/*Removing password "green" from dataset "testpass"*/
proc datasets lib = work;
modify testpass (pw=green/);
run;

/*Reading into another dataset*/
data testpass_next;
set testpass(read=red);
run;

SQL: How to fetch only 10 rows from different servers

Database SQL Syntax
DB2 ----> select * from table fetch first 10 rows only
Informix ---->select first 10 * from table
MS SQL Server and Access ----> select top 10 * from table
MySQL and PostgreSQL ----> select * from table limit 10
Oracle 8i ----> select * from (select * from table) where rownum <= 10