MSSQL- Extract Security Objects

DROP

— Script to extract and DROP Database Security Objects from a refreshed database before putting in place the correct permissions

declare
@DBName sysname,
@sql_perms nvarchar(max);

set @DBName = DB_name();

set @sql_perms = N’

— Object Permissions

select
(”USE ‘ + QUOTENAME(@DBName) + N’;
IF EXISTS (Select 1 from sys.objects where name = ””” + Object_name(major_id) + ”””
INTERSECT SELECT 1 from sys.database_principals where name = ””” + USER_NAME (dp.grantee_principal_id) + ”””
)
REVOKE ” + permission_name + ” ON [” + SCHEMA_NAME(SO.schema_id) + ”].[” + OBJECT_NAME(dp.major_id) +
”] FROM [” + USER_NAME(dp.grantee_principal_id) + ”]”) Collate Database_default as SQLTEXT
from sys.database_permissions dp
INNER JOIN sys.database_principals dps
ON dp.grantee_principal_id = dps.principal_id
INNER JOIN sys.objects SO
ON dp.major_id = SO.object_id
WHERE dps.name NOT IN (”public”, ”guest”)
and dps.type != ”R”

UNION ALL


— Role Memberships

select
(”USE ‘ + QUOTENAME(@DBName) + N’;
If exists (
select 1 from sys.database_role_members
where user_name(role_principal_id) = ””” + user_name(DRM.role_principal_id) + ”””
and user_name(member_principal_id) = ””” + user_name(DRM.member_principal_id) + ”””
)
Alter role [” + USER_NAME(DRM.role_principal_id) + ”]
DROP MEMBER [” + USER_NAME(DRM.member_principal_id) + ”]”) Collate Database_default as SQLTEXT
from sys.database_role_members DRM
INNER JOIN sys.database_principals DP
ON DRM.member_principal_id = DP.principal_id
where DRM.member_principal_id > 1

UNION ALL


— Database Users

select
(”USE ‘ + QUOTENAME(@DBName) + N’;
If exists (select 1 from sys.database_principals where name = ””” + name + ”””)
drop user [” + name + ”]”) Collate Database_default as SQLTEXT
from sys.database_principals
where principal_id > 4
and type IN (”S”, ”U”, ”G”)
and name not in (
select user_name(principal_id) from sys.schemas
where schema_id in (select o.schema_id from sys.objects o, sys.schemas s where s.schema_id = o.schema_id and o.schema_id between 5 and 16383)
and schema_id between 5 and 16383
and principal_id between 5 and 16383)

‘;

print ‘Executing in database: ‘ + @DBName;
exec sp_executesql @sql_perms;


CREATE

— Script to extract Database Security Objects for current database before a refresh

declare
@DBName sysname,
@sql_perms nvarchar(max);

set @DBName = DB_name();

set @sql_perms = N’

— Database Users

select
(”USE ‘ + QUOTENAME(@DBName) + N’;
If not exists (select 1 from sys.database_principals where name = ””” + dp.name + ”””)
Create user [” + dp.name + ”]”) Collate Database_default as SQLTEXT
from sys.database_principals dp
where dp.principal_id > 4
and dp.type IN (”S”, ”U”, ”G”)

UNION ALL


— Roles

select
(”USE ‘ + QUOTENAME(@DBName) + N’;
If not exists (select 1 from sys.database_principals where name = ””” + dp.name + ””” AND type = ””R””)
Create role [” + dp.name + ”]”) Collate Database_default as SQLTEXT
from sys.database_principals dp
where dp.type IN (”R”, ”A”)
and dp.name <> ”public”
and dp.is_fixed_role <> 1

UNION ALL


— Role Memberships

select
(”USE ‘ + QUOTENAME(@DBName) + N’;
If not exists (
select 1 from sys.database_role_members
where role_principal_id = [” + USER_NAME(DRM.role_principal_id) + ”]
and member_principal_id = [” + USER_NAME(DRM.member_principal_id) + ”]
)
Alter role [” + USER_NAME(DRM.role_principal_id) + ”]
ADD MEMBER [” + USER_NAME(DRM.member_principal_id) + ”]”) Collate Database_default as SQLTEXT
from sys.database_role_members DRM
INNER JOIN sys.database_principals DP
ON DRM.member_principal_id = DP.principal_id
where DRM.member_principal_id > 1

UNION ALL


— Object Permissions

select
(”USE ‘ + QUOTENAME(@DBName) + N’;
” + dp.state_desc + ” ” + dp.permission_name + ” ON [” +
SCHEMA_NAME(o.schema_id) + ”].[” + OBJECT_NAME(dp.major_id) + ”]
TO [” + USER_NAME(dp.grantee_principal_id) + ”]”) Collate Database_default as SQLTEXT
from sys.database_permissions dp
INNER JOIN sys.database_principals grantee
ON dp.grantee_principal_id = grantee.principal_id
INNER JOIN sys.objects o
ON dp.major_id = o.object_id
WHERE grantee.name NOT IN (”public”, ”guest”);

‘;

PRINT ‘Executing in database: ‘ + @DBName;
EXEC sp_executesql @sql_perms;

MSSQL Interview Questions

MSSQL:

Q) What are new features of SQL Server 2012/2014/2016/2017/2019/2022?

A) SQL Server 2012 introduced Always On which includes availability groups (a clustered group of databases in a server), Contained Databases with their own separate usernames and passwords making it easier to migrate between servers, ColumnStore indexes for better read-only performance (they are like sybase iq DWH indexes), and also enhancements to the ShowPlan output

SQL Server 2014 introduced in memory OLTP where you can place suitable tables and stored procedures into memory, giving up to 30x better performance. It also has much more integration with Azure for backups and availability groups etc. ColumnStore indexes are now updateable. SSD Buffer Pool extensions can improve performance dramatically. Incremental statistics only update for new and changed rows then merge with existing stats, much faster. Delayed Durability offers a more relaxed logging option which speeds up transactions but at the cost of recoverability.

SQL Server 2016 introduced Query Store which maintains a history of query execution plans with their performance data. Polybase, a SQL Server connector to Hadoop. Stretch Database where some part of your tables (configurable or automated) will be moved into an Azure SQL Database in the cloud. Java Script Object Notation (JSON) support built in. Row Level Security in tables. Always Encrypted is new functionality through the use of an enhanced client library at the application so the data stays encrypted in transit, at rest and while it is alive in the database. Enhancements to In Memory functionality. Temporal tables, also named system-versioned tables, allow SQL Server to automatically keep history of the data in the table

SQL Server 2017 introduced the ability to run it on Linux and Docker containers in addition to Windows. It has Graph Database Capabilities which allows you to more efficiently store and query complex relationships between data. Phyton support built into the database engine. Resumable Online Index Rebuilds, useful for large index rebuilds where you have a small maint window. New CLR security model. Adaptive Query processing to improve Query processing over time by adjusting query plans where necessary. Scaling out Integration Services to allow you to run a package across multiple machines.

SQL Server 2019 introduced Intelligent Query Processing enhancements, this includes dynamic memory grants for rowstore tables, table variable deferred compilation, batch mode on rowstore and more. Accelerated Database Recovery dramatically decreases the time needed to bring a database online on startup/recovery. Enhancements to Always On Encryption with Secure Enclaves. Memory-Optimized Tempdb metadata which puts some key tempdb metadata into memory and optimizes it. Polybase expanded to support Oracle, Teradata, MongoDb and more. There are also multiple other Internal Performance Improvements like Index optimization, temp table recompilation, checkpoint scalability etc.

SQ Server 2022 introduces Azure Synapse Link which allows you to gain real-time insights without performance degradation of the SQL database. You can Link/Pair with Azure Managed Instances which gives the best of on-premise and Cloud Servers including simplified DR solutions. There is also Parameter Sensitive Plan Optimization (PSPO) that optimizes the performance of SQL queries with parameters. Microsoft Purview integration will helps you understand, discover and manage data from various sources. Ledger, when this feature is enabled an old version of your tables will be saved when changes are made, secured using Blockchain. Security Improvements; Microsoft Defender for Cloud integration, Microsoft Purview integration, Ledger, Azure Active Directory Authentication, Always Encrypted with secure enclaves, Access Control, Dynamic data masking, Support for PFX certificates & MS-TDS 8.0 Protocol


Q) If someone has a problem with a spid, how would you investigate it?

A) Use the activity monitor, and perhaps dbcc inputbuffer, dbcc opentran to show the longest running transaction. You can also start a trace on that spid directly from the activity monitor.


Q) How do you change the default location of a database?

A) You can specify the default location for data and log files in the server properties, database settings.


Q) What are the different types of SQL Authentication?

A) There is SQL Server Authentication and Windows Authentication. Can possibly also use other methods like LDAP via 3rd party tools and with Azure you have username and password, multi-factor authentication and federated authentication via Azure AD.


Q) What is a dbcc and why would you use it

A) DBCC stands for Database Console Commands and are grouped into Maintenance tasks, Informational, Miscellaneous and Validation operations.  The most familiar is dbcc checkdb to check consistency but there is also dbcc showcontig to check fragmentation of a table and dbcc show_statistics to show statistics amongst many others.


Q) What is the difference between dbcc reindex and dbcc index (defrag)?

A) dbcc reindex is an offline procedure, dbcc index (defrag) is performed online.


Q) What is the difference between rebuilding an index and reorging an index

A) Reorg only changes the leaf level of the index, much quicker than rebuilding the whole index?


Q) How would you check the statistics for a table?

A) Run dbcc show_statistics against it.


Q) What are statistics and why are they so important?

A) Statistics, Index and column, are used by the optimiser to choose the optimal execution plan for a query.


Q) What kind of information do the statistics contain?

A) Statistics contain information on the cardinality and data distribution of data in a table or index.


Q) What is the UPDATE_STATISTICS command for?

A) UPDATE STATISTICS updates the query optimization statistics on a table or indexed view. This should be handled automatically by the server but sometimes you need to run it manually to improve the query plans being produced, especially on busy tables with lots of changes to the data.


Q) What things would you configure on the analysis server, high level?

A) Increase minimum allocated memory, increase read-ahead buffer size. Increase Process buffer size, enable Proactive caching, maybe increase maximum number of threads.


Q) There are a few options which MSSQL server lets you set which should not be set on a production server?

A) You should NOT enable Auto Shrink or Auto Close and a production database should typically be set to full or bulk logged recovery model NOT simple.


Q) What services can be clustered in SQL Server 2008?

A) SQL Server, SQL Server agent, Analysis services and Reporting services.


Q) What resources do you set up against a SQL server resource in a cluster?

A) SQL IP Address, SQL Network Name, SQL Server, SQL Server Agent, Disk Resources and maybe Full Text and Analysis Services if required.


Q) How many IP addresses would a 2-node cluster with 1 SQL Server instance need?

A) For an Active/Passive cluster you would need 7:
1 IP for Windows Cluster, 1 IP for SQL Server Virtual Network Name, 1 IP for MSDTC (this is optional), 2 IP’s for public network (1 per node), 2 IP’s for heartbeat network (1 per node)


Q) The SQL Server service should be configured with which startup type if in a Cluster?

A) Manual


Q) What is the difference between an active/active and active/passive cluster?

A) In active/active both nodes have instances on them in active/passive one node is in an idle standby state.


Q) What resources should NOT have “affect group” set in the cluster?

A) The only one which should not have “affect group” set is SQL Server Agent.


Q) What is the difference between an offline group operation and a move group operation?

A) An offline group operation takes an online group offline whilst a move group operation moves a group and all its resources from one node to another.


Q) If you offline the group and reboot the node what happens to the group when the box comes back up again?

A) It will remain offline until it is explicitly put it online again.


Q) Explain the difference between ISAlive and LooksAlive and where can it be configured?

A) LooksAlive checks that the SQL Server service is up and running. If it fails then IsAlive is run which actually logs into the sql server, in case that fails the cluster will first try to reboot on the same node, if that doesn’t work it will fail the SQL server over to the next node. The polling intervals etc can be adjusted/configured in the Cluster service properties.


Q) How do you start a SQL Server from the command prompt in single user mode?

A) To start the default instance it’s sqlserver.exe –m  for a named server it is sqlserver.exe –m –s <servername>.


Q) What is the purpose of the SQL Agent

A) It controls Jobs and Alerts.


Q) How can you set the memory of the server to not go above 3GB?

A) In server properties set the maximum server memory to 3GB.


Q) How would you move the tempdb?

A) Execute the following 2 commands: alter database tempdb modify file (name=tempdev|templog, filename=’c:\Sqldata\tempdb.mdf|templog.ldf’), then restart the server and the tempdb will be created on the new devices in the new location, you can then remove the old devices.


Q) How would you optimise tempdb?

A) You should ensure there are half as many tempdb data files as you have cpu cores, this cuts down on Allocation contention. Another option is to place the tempdb onto an SSD. Can also disable auto update stats and auto create stats but need to be careful doing this.


Q) What is the default SQL Server port number?

A)  1433


Q) In SQL Server 2005 onwards what is port 1434 used for?

A) The SQL Server browser uses, by default, port 1434 to listen for incoming connections to named services which use dynamic ports. It is also used for the DAC connection.


Q) What is DAC and what port does it use?

A) It is a special Diagnostic connection for administrators when standard connections to the server are not possible. You can connect to it by prefixing admin to the instance name in SSMS or by using the –A switch for sqlcmd. DAC listens for connections on port 1434.


Q) Are there any restrictions on using DAC?

A) Yes, you can only have one DAC connection to a server at a time and you can’t execute backup and restore commands.


Q) How would you find the port that DAC is listening in on?

A) You can find this information in the SQL Server log, “Dedicated admin connection support was established for listening locally on port ….”


Q) How do you change the static port number for a named instance?

A) In SSCM You need to set the TCP Port under “IP All” to the required port value AND remove any entries for TCP Dynamic Ports.


Q) Where is logging information kept within sql server?

A) You can access the logs from the SQL Server management Studio, both SQL Server, System and SQL Server Agent logs can be viewed but the actual physical logs are on the file system.


Q) Describe what an index is and how it works?

A) Indexes speed up the querying process by providing swift access to rows in the data tables. A Database index works in much the same way as a Book’s Index. If you want to find a particular subject/word then you go to the back of the book, check the index for that word and then go to the page indicated. It’s exactly the same in a database, you would go to the index search for the item and then go to the data page indicated.  A clustered index goes a bit further than this as it forms part of the actual table, in terms of a book you can think of it as being the chapter, headings and subheadings of a book with the pages being the actual leaf level of the index.


Q) What are the differences between Clustered and Non-Clustered indexes?

A) A clustered index defines the order in which data is physically stored in a table, it forms and orders the table where the leaf level is the actual data page. Table data can be sorted in only one way, therefore, there can be only one clustered index per table.
A Non-clustered index is separate from the table and just points to the relevant pages within the table, therefor you can have multiple non-clustered indexes for one table.


Q) What types of indexes can you have?

A) The main ones are non-clustered, clustered, Unique and ColumnStore. Can also have index with included columns, index on computed columns, filtered, spatial, xml and Full-text. Of course you don’t have to have an index at all, if you don’t then the table is referred to as a heap table.


Q) Can you create indexes on views?

A) Yes


Q) Which is better an INDEX SEEK or an INDEX SCAN?

A) An Index Seek is better especially for larger tables, an Index Scan is like a full table scan, but only on the columns in the index so it’s a bit better than a full table scan.


Q) What is a covering index?

A) A Covering index is an index that contains all the columns you need for a query.


Q) In a query what would be the difference between columns which are part of a covering index and “included columns”?

A) If the column is not needed in the WHERE/JOIN/GROUP BY/ORDER BY, but only in the column list of the select statement then you can add it as an included column, this results in a smaller and more efficient index.


Q) What index is the best choice for a date range search?

A) Clustered Index.


Q) How would you display fragmentation information for a specific table?

A) Use dbcc showcontig command against the table or index.


Q) How would you display fragmentation information, other than with dbcc showcontig?

A) The newer method is to use the dmv sys.dm_db_index_physical_stats


Q) What is the difference between internal and external table fragmentation?

A) Internal fragmentation is unwanted free space on a page caused by deletes/updates/inserts. External fragmentation is when the logical order of index pages varies from their physical organization in the data file, caused by page splits with pages linking back and forth between extents.


Q) What is the difference between fragmentation in a traditional index compared to a columnstore index?

A) A traditional b-tree index is considered fragmented if the physical order of pages is out of sync with the key order or if the data or index pages are only partially filled. A columnstore index is considered fragmented if more than 10% of the rows are marked as deleted.


Q) What is segment elimination as related to columnstore indexes?

A) Each partition in a columnstore index is broken into one million row chunks called segments. Each segment has metadata that stores the minimum and maximum value of each column for the segment. The storage engine checks filter conditions against the metadata. If it can detect that no rows will qualify then it skips the entire segment without even reading it from disk which means much less IO required in searches.


Q) What command would you use to check the integrity of a database, what option to check the physical allocation only?

A) You would use dbcc checkdb and if you only want to check the physical allocation then you’d add the Physical_only option, i.e dbcc checkdb (‘dbname’) with physical_only.


Q) What is dbcc checkdb actually checking in terms of the data etc?

A) It runs the following:
DBCC CHECKALLOC Checks consistency of disk space allocation structures
DBCC CHECKTABLE on every table and view in the database – Checks the integrity of all the pages and structures that make up the table or indexed view
DBCC CHECKCATALOG on the database – Checks for system catalog (a set of tables that store metadata about objects in the database) consistency within the database.
Validates the contents of every indexed view in the database


Q) What is locking/blocking and how would you get information on locks?

A) Locking/Blocking is the process of ensuring data consistency in a database, so for example as one process is updating a row of data it will not allow another process to update it at the same time. You can get information on locks by using the sp_lock stored proc, querying the dm_tran_locks DMV and for more detail by running SQL Profiler.


Q) What is blocking and how would you find information on it?

A) Blocking is when a request tries to access pages/rows which are locked by another request, a good dmv for spotting this is sys.dm_os_waiting_tasks which returns information about the wait queue of tasks waiting on some resource.


Q) What are traceflags and when would you use them?

A) Trace flags are used to set specific server characteristics. For example it can be used to print out extra information about deadlocks with traceflags 1204 and 1222. They are either set at startup with the –T option or can be explicitly started with dbcc traceon(<traceflag number>).


Q) What are dynamic management views?

A) DMV’s were introduced in SQL Server 2005 and provide a simple way for assessing the state of a server and gathering diagnostic information without placing to much overhead on the SQL Server.


Q) Name 3 useful dmv’s

A) dm_exec_query_stats, dm_exec_session_wait_stats, dm_db_session_space_usage


Q) Which DMV can give you a query plan?

A) There are many different categories of Dynamic Management Views, they are;

              Dm_exec, dm_os, dm_tran, dm_io, dm_db, dm_repl, dm_broker, dm_fts.

These are then selected from using a cross apply with various table functions like dm_exec_sql_text to get for example a list of the costliest stored procs and transactions running on the server.

The specific dmv used to view query plans is dm_exec_query_stats joined with the table function dm_exec_query_plan.


Q) What is the new system database in SQL Server 2005 and what is it used for?

A) It is called resource and holds all the system objects deployed with MS SQL server. The system objects include stored procs and system views which logically appear in each database. These objects were moved to a central location to make any upgrade process easier.


Q) What impact does the resource database have in terms of DBA support?

A) It is crucial as all system views and procedures are stored in it.


Q) How would you restore the system resource database?

A) You would need to restore/copy the database files for the Resource database to the location of the master database files.


Q) What are SOX systems? (relating to big environments and very secure systems which need special regulation and control)

A) The Sarbanes-Oxley Act (SOX) of 2002 sets new requirements for the integrity of the source data relating to financial transactions and reporting. To prove the integrity of data, companies must extend audit processes to the financial information stored within corporate databases. To verify regulatory compliance, auditors look at multiple aspects of a database environment including: user management, authentication, separation of duties, access control, and audit trail.


Q) What is the Distributed Transaction Co-ordinator (DTC) used for.

A) The DTC manages transactions across a network of Windows based systems. MS DTC allows:

– Distributed transaction support for Microsoft SQL Server.

– Updated data residing on two or more Microsoft SQL Server systems.

– It’s also a requirement for clustering to work.


Q) The log file just keeps on growing, why is that?

A) That would either be because you have a run-away transaction that would need to be identified and killed or more likely the database is set to full recovery mode but there are no jobs or maintenance plans set up for backing up and truncating the transaction log.


Q) Steps to optimise a badly running SQL Server query?

A) You need to identify the query and extract the query plan for it with sp_showplan (spid) or by running a trace against the spid. You could also use dm_exec_query_stats together with the table function dm_exec_query_plan.

In most cases adding an extra index will help but sometimes you will need to completely rewrite the query to perhaps use temporary tables etc for sorting.


Q) What operators on a query plan would alert you to possible performance issue

A) Table scans, Sorts and hashes are the most expensive operators, also any big mismatch between expected and actual row counts would indicate poor and out of date statistics which can be fixed by running an update stats or even creating new statistics.


Q) You get a complaint that the database is slow today. What are the first things you do?

A) The first thing to do is to log into the SQL Server and then open Activity monitor to see what is currently running on the SQL Server. Specifically, it’s a good idea to check if there is any locking or blocking occurring with sp_lock. I would also look at the overall load on the SQL Server and examine the underlying Windows server for any issues. Another useful thing is to examine if any indexes have been dropped recently or if there have been any other changes to data structures or data volumes. Fragmentation could also be an issue.


Q) How would you find the longest running queries with SQL Server Profiler?

A) In SQL Profiler you would run a trace using the template SQLProfilerTSQL_Duration and look at the durations in the output. You could also use dmv’s, dm_exec_query_stats joined with the table functions  dm_exec_sql_text and dm_exec_query_plan if you also want to see the query plans being produced.  Or you can use the SSMS report on top queries.


Q) What is a primary key and a foreign key?

A) A Primary key uniquely identifies each record in a table, it must contain unique values and cannot contain NULL values. A Foreign Key in one table points to a Primary key in another table, it’s used as a constraint to protect referential integrity, in other words, only values that are supposed to appear in the database/tables are permitted.


Q) Can Unique keys store NULL values?

A) Yes they can which is one of the main ways they differ from primary keys.


Q) If you want to remove a table definition and its data, what command should you use?

A) DROP table


Q) What is the difference between the TRUNCATE and DELETE commands?

A) TRUNCATE is faster and uses fewer system and transaction log resources than DELETE. TRUNCATE removes the data by deallocating the data pages used to store the table’s data, and only the page deallocations are recorded in the transaction log. DELETE removes rows one at a time and records an entry in the transaction log for each deleted row.


Q) Which utility or tool is best at identifying long running transactions?

A) SQL Profiler or Extended Events.


Q) What are the pros and cons between Profiler and Extended Events?

A) The pro with profiler is the familiarity, but in all ways Extended events is better. Extended events places much less load on the server being examined and Extended Events offers more monitoring options, it has almost 8 times as many available events as Profiler.


Q) Which utility or tool is best for assessing disk queue length?

A) Performance Monitor (Perfmon).


Q) What counters would you use in perfmon to monitor disk performance?

A) The 2 best ones are Physical Disk-Avg Disc sec/Read and  Physical Disk-Avg Disc sec/Write which should both be below 0.01.


Q) What are the key metrics you would use for monitoring performance on SQL Server

A) The key metrics are Transact SQL (T-SQL) metrics, Batch requests per second, SQL compilations per second, Last elapsed time, Buffer cache metrics, Buffer cache hit ratio, Checkpoint pages per second and Page life expectancy. These together with Wait Stats will point you towards specific areas for further examination e.g. inadequate memory, slow Disks or insufficient CPU etc


Q) What is the Size of a SQL Server page and how many pages are in an extent?

A) Each page is 8KB, and each extent has 8 pages so 64KB in total.


Q) What percentage of the page is filled by the default server-wide fillfactor value?

A) 100%


Q) Why can a fill factor of 100% be bad, what happens with page splits etc?

A) If an existing row is updated and its length is increased, then it may not fit on to its original page and a page split occurs. In a page split, half the rows remain on the original page and the rest are moved to a new page. We now have two pages that are just over 50% full, with the result that we need additional I-O and memory to get the data from these pages. However, if you set it lower then you may still eventually get page splits after a lot of updates.
A fill factor of 100 is best for tables that have a cluster index based on a primary key that uses an Identity value. However, for tables based on other columns, especially tables with a primary key that is a GUID, then a different fill factor can be good.
In any case its important to check fragmentation of a table/index and perform reorg’s and index rebuilds on a regular basis to remove page splits.


Q) What trace flags should you activate to capture deadlock information in the SQL Server 2005 or later error log?

A) 1204, 1222


Q) How would you troubleshoot deadlocks in a SQL Server

A) Can use Traceflags 1204 and 1222, but the better approach is to use Extended events to capture the deadlocks which can then be analysed, and hopefully this will lead to the queries being optimised to avoid the issue going forwards.


Q) What is the difference between a deadlock and a livelock and where might a livelock occur in SQL Server?

A) A deadlock occurs when 2 or more tasks permanently block each other by each task having a lock on a resource which the other task is trying to lock. SQL Server will kill one of the tasks to free the other one(s). A Livelock is where a request for an exclusive lock is denied continuously because of a series of overlapping shared locks which keep on interfering with each other and changing state which stops them from finishing, SQL Server automatically detects the situation after four denials and refuses further shared locks. You might see Livelocks if you use partitioning.


Q) Which DMV (Dynamic Management View) returns aggregate performance statistics for cached query plans?

A) sys.dm_exec_query_stats


Q) Which system database stores SQL logins?

A) master


Q) Which system database stores SQL Server jobs?

A) msdb, BTW the msdb database is also used for the SQL server agent, service Broker, Database Mail, Log shipping and more.


Q) What system databases are in SQL Server and what do they do?

A) Master – Records all the system-level information for an instance of SQL Server incl Logins
Msdb – Used by the SQL Server Agent for scheduling alerts and jobs
Model – Template database for all user defined databases, incl tempdb
Resource – Is a read-only database that contains SQL Server system objects. System objects are physically in the Resource database, but logically appear in the sys schema of every database.
TempDB – Stores temporary tables, table variables, cursors, work tables etc, used for index creation.
Distribution – Used for SQL Server replication


Q) What are the steps to restore a master database?

A) You need to stop the SQL Server service and restart it in single user mode from a command prompt (sqlservr.exe -c -m -s <servername>). Next open up a second command prompt and connect to the sql server using SQLCMD. Then run the command to restore the master database from your backup using the REPLACE option. Once finished the Server will shut itself down, now simply boot it back up in normal mode and you should be good to go.


Q) Which stored procedure is used to find blocking processes?

A) sp_who2.    sp_WhoIsActive is an even better proc for getting info on active spid’s.


Q) What are the 3 main backup types and which one would you use to get to a point in time?

A) Full, Diff and Transaction log. To get to a point in time you’d first restore the full backup then diff (if used) and finally transactional log backups to the point in time required.


Q) How to take a backup without breaking the backup chain/sequence?

A) You can do a backup database with COPY_ONLY, this will allow you to carry on taking your Full, Diff and Log backups without any interruption.


Q) What is the command to do a point in time recovery?

A) Recover full, diff (if used), and log files up to and including the time required. For the last one you specify STOPAT and the time, e.g  RESTORE LOG AdventureWorks2008R2 FROM AdventureWorksBackups  WITH NORECOVERY, STOPAT = ‘Apr 15, 2020 12:00 AM’. Then restore the database with recovery to bring it online.


Q) How does SQL Server know what to back up in a differential backup?

A) It is done at the extent level whereby a full backup will reset the flags on all extents so that they are turned off. If that extent is later modified then the flag is set to on so that a diff backup knows to back it up. The flags only get set back to off at the next full backup so differential backups grow in size over time as more and more data is changed. Differential backups can be run on all recovery models; Full, Bulk-logged and Simple.


Q) In a data warehousing environment what effect might your backup strategy have on performance and what considerations might you take when deciding on a strategy?

A) In a typical Data Warehouse you will only have one single batch load of data per day and the majority of the data is static which means that you don’t need to worry so much about backing up the log, in fact the recommendation is to run the database in simple mode and perform the backup just before the ETL data load. For a relatively small DWH then you can just do full backups every night. For a medium size DWH perform weekly or monthly Full backups and Daily differential backups. For a Large DWH you need to start to think about partitioning and moving static tables and data into read-only partitions and then you do nightly full filegroup backups only on the read/write file groups.


Q) To restart a log backup chain in 2008 do you take a full backup?

A) No a differential backup will also restore the backup chain so that you can continue taking log backups.


Q) Which High Availability solution includes backup, copy and restore jobs?

A) Log Shipping


Q) What is Log Shipping?

A) Log Shipping is a way of automatically backing up a database and transaction log on a primary database and applying them to a standby database for recovery. It is a warm standby solution.


Q) Which version of SQL Server has log shipping?

A) Enterprise edition


Q) What are the main types of SQL Server replication?

A) Merge, Snapshot and Transactional.


Q) Which agents are used in transactional replication?

A) Snapshot, Log Reader and Distribution Agent


Q) How exactly are replicated transactions applied to the subscriber?

A) First the snapshot agent prepares a snapshot file containing schema and data of published tables and objects, stores the files in the snapshot folder and records sync jobs in the distribution database of the distributor. The Log Reader Agent monitors the transaction log for the replicated database and copies any relevant transaction to the distribution database. The Distribution Agent copies the initial snapshot files and subsequent transaction held in the distribution database to Subscribers. 


Q) Is it possible to insert records without merge/insert triggers executing?

A) Yes you can bulk load the data using either bcp or bulk insert.


Q) Name the SQL Server transaction isolation levels?

A) READ UNCOMMITTED “Dirty reads”, READ COMMITTED(Default), REPEATABLE READ, SNAPSHOT and SERIALIZABLE.


Q) When might you use a READ PAST table hint?

A) You might use this to alleviate locking issues as it permits the server to skip past write-locked rows.


Q) What are the good and bad things about a NOLOCK table hint?

A) The good is that it prevents shared locks from affecting rows returned in your queries, this increases performance and concurrency. The bad is that due to ignoring any locks some dirty/uncommitted data could potentially be read which can lead to inconsistent data.


Q) What is normalisation?

A) Database normalisation is the process of organising the fields and tables of a relational database to minimise redundancy and dependency, it usually involves dividing large tables into smaller tables and defining relationships between them.


Q) How would you give a user access to run a stored procedure?

A) You would grant execute on the stored proc to the user.


Q) What are the best practices if setting up security for a new database?

A) Install only the required components, try to only use Windows authentication mode, Disable the sa account and rename it, Enforce a strong password policy, Change the default SQL Server ports, Remove BUILDIN\Administrators group from SQL Server Logins, Enable logging of successful and failed login attempts, Hide the SQL Server instance or disable the SQL Server Browser service.


Q) How would you separate Windows administration and SQL Server Administration.

A) Deny access to “Builtin\Administrators”


Q) How many Administrative (server) roles are there in SQL Server?

A) Eight roles, dbcreator, diskadmin, bulkadmin, setupadmin, processadmin, securityadmin, serveradmin and sysadmin.


Q) What is Role-Based Access Control?

A) Role-Based Access Control (RBAC) is a security model where permissions are associated with roles, and users are assigned to these roles, thus gaining the role’s permissions. In SQL Server, there are server-level roles and database-level roles.


Q) Why would you use Statistics IO?

A) Statistics IO causes SQL Server to display statistical information regarding the amount of disk activity and CPU generated by a query, it’s a good tool for troubleshooting a query.


Q) How would you prevent a word from being included in a full text catalog?

A) You would add that word (stop word) to the existing Stoplist or create a new stoplist.


Q) How do you check what jobs are running on a server?

A) Jobs activity monitor is the easiest way.


Q) How do you access the instance activity monitor?

A) On the SSMS toolbar click Activity Monitor(Ctrl-Alt-A), in older version it was found under the Management tab of explorer.


Q) What is an allocation bottleneck?

A) Allocation bottlenecks refer to contention in the system pages that store allocation structures, PFS (Page Free Space), GAM (Global Allocation Map) and SGAM (Shared Global Allocation Map). The bottleneck will typically arise in tempdb and can be eased by using multiple data files in tempdb and maybe also by enabling trace flag T1118.


Q) What are the steps needed to connect to a SQL Server instance on a fixed port from SSMS?

A) You can specify this in advanced properties in the connection window. For sqlcmd you can specify the address as <IP address>,port.


Q) Is there anyway of simplifying the connection so that you don’t have to type it in each time?

A) Yes you can add an alias to it via SSCM SQL Client configuration. You could also just add it as a Registered server in SSMS?


Q) How do you change the port in an instance of Reporting services and where is this information stored?

A) The easiest way is to do this with Reporting Services Configuration Manager and you can change the port number in Web service configuration and Report Manager-Advanced.


Q) If you use a checksum on a backup does this remove the need to run a dbcc checkdb on the database?

A) No, a checksum only checks the integrity of the backed up data, although it would possibly give a bit of a heads up to any issues in the database.


Q) What is a merge statement?

A) A merge statement is a great way of simplifying SQL code by allowing you to do a conditional insert, delete and update in one statement as opposed to 3 separate statements.


Q) What are the different types of data compression in a sql server?

A) Row level and Page level compression, there’s also backup compression.


Q) Describe how database compression works?

A) Row-level data compression is essentially turning fixed length data types into variable length data types, freeing up empty space. It also has the ability to ignore zero and null values, it all means you can fit more rows in a data page. Page-level data compression is row-level compression plus prefix and dictionary compression to save on repeated values to give better total compression.


Q) In a datawarehouse environment what effect might you expect page and row compression to have on performance?

A) Much less disk space required, much less physical IO in queries so quicker queries, as long as you have enough CPU resources for compressing/decompressing the data.


Q) What permissions are required to display execution plans from the profiler?

A) Users must be granted ALTER TRACE permission.


Q) What permissions are required to display execution plans from dmv’s or stored proc?

A) They must have relevant permissions on the dmv or stored proc AND be granted SHOWPLAN permission on the database.


Q) What are the minimum rights in 2008 to run a truncate statement?

A) The minimum permission required is ALTER on the table_name.


Q) What is a filtered index?

A) It is an index with a where clause in it.


Q) How does sparse file initialisation improve database creation and expansion?

A) It reclaims used disk space without filling that space with zeros first so it’s much quicker.


Q) Which user operations are non logged?

A) Select into and bcp are the main ones.


Q) What dbcc command would you run post upgrade?

A) It’s a good idea to run DBCC UPDATEUSAGE(0) to update page and row counts. Probably also a good idea to run dbcc checkdb.


Q) What is table partitioning and what are the benefits?

A) It’s a way of splitting a table into more manageable chunks which usually also improves query performance.  It also makes it more scalable and reduces blocking.


Q) Explain the changes in index maintenance strategies since SQL 2000?

A) In later versions of SQL Server there is more information on duplicate and unused indexes in for example DMV’s. There is also now the ability to do resumable online index rebuilds?


Q) What options are available to monitor user activity in SQL Server?

A) Can use triggers or perhaps a server side profiler trace. Can also enable auditing of logins.


Q) What is SQL Injection and why is it a problem?

A) It is a way of attacking a SQL Server via the front-end application or webpage by inserting SQL commands to an improperly designed and secured form.


Q) How should you avoid SQL Injection?

A) Constrain and sanitize the input data by checking for good data by type, length, format and range. Avoid disclosing database error information. Use type-safe parameters for data access. Most importantly perhaps is to use an account that has restricted permissions in the database.


Q) What is the difference between set statistics profile on and set showplan xml on?

A) The main difference is that set showplan_xml on doesn’t actually execute the query, whilst the other one does. Both return the query plan info.


Q) What are certificates and what are they used for?

A) Certificates are used to help secure connections, in database mirroring, to sign packages and other objects or to encrypt data or connections.


Q) What considerations must be given to recovery planning if you encrypt data?

A) You must ensure that you also back up any associated certificates and keys by using the BACKUP CERTIFICATE command.


Q) What methods are available for encrypting data in SQL Server?

A) There is Cell-level encryption which includes certificates, symmetric and asymmetric keys and gives a lot of granularity. If you want to encrypt the whole database you can use Transparent Data Encryption TDE. There is also Windows File-Level encryption and Transport-Level encryption like SSL.


Q) How would you implement SQL Server data encryption so that privileged users can’t decrypt it?

A) You can do this using cell-level encryption with only the application layer knowing the key.


Q) What alternatives to backup/restore data recovery are there?

A) Failover clustering, Database Mirroring, Always On, Log shipping and Transactional replication.


Q) Always-On Availability Group read-only copies, how to set up routing and what to add in the connection string?

A) SQL Server 2016 supports up to eight replicas. By default, the secondary replicas do not allow any workload, but they can be configured as an active readable secondary to allow read-only access to all secondary databases. Setting the readable secondary as Read-Intent Only will allow the secondary server to serve the read-only workload only if the connection string of the application contains the Application Intent=Readonly parameter. Connections with Application Intent value equal to ReadOnly can be enforced to secondary replicas using a new Always On Availability Group feature called Read-Only Routing. In this way, the queries that will perform read-only processes that are directed to the Always On Availability Group listener will be redirected to the secondary replicas instead of the primary replica. In SQL Server 2012 the read-only workload redirection process will redirect the queries specifically to the first secondary replica defined in the routing list, but form 2016 onwards you can add a load_balance list so that there is proper load balancing and all the secondary replica’s can be used.
https://www.sqlshack.com/how-to-configure-read-only-routing-for-an-availability-group-in-sql-server-2016/


Q) What does set MaxDOP(2) do?

A) MAXDOP(n) actually controls the number of CPU cores that can be used by the query processor for parallel processing.


Q) What is most appropriate action to take when trying to resolve a bookmark lookup in a query plan? Create a covered index, force a recompile or rebuild the index?

A) Creating a Covered Index including the lookup column is the best option so as to avoid the costly bookmark lookup.


Q) What is the difference between a where clause and a having clause?

A) A having clause behaves like a where clause within a group by statement


Q) What is the difference between a subquery and a derived table query?

A) A subquery is a SELECT statement that is nested within another statement, it’s like having a query within another query and is used in the Where clause. A derived table is basically a subquery, except it is always in the FROM clause of a SQL statement.


Q) What is collation?

A) A collation encodes the rules governing the proper use of characters for either a language, such as Greek or Polish, or an alphabet, such as Latin1_General. It defines case sensitivity, sort order etc.


Q) What is the purpose of CTE?

A) A common table expression (CTE) can be thought of as a temporary result set that is defined within the execution scope of a single SELECT, INSERT, UPDATE, DELETE, or CREATE VIEW statement. CTEs can be thought of as alternatives to derived tables (subquery), views, and inline user-defined functions.
The main use for a CTE is in recursive queries and where you need to perform an operation based on a few columns which you select in advance. It’s a substitute for creating a view.


Q) What is the purpose of an execution plan?

A) An execution plan is the result of the query optimizer’s attempt to calculate the most efficient way to implement the T-SQL query you submitted.


Q) What are the different types of temporary table?

A) There are 2 types. A local temporary table, #table_name, exists only for the duration of a user session or the procedure that created the temporary table, it can be accessed only by that user. A global temporary table, ##table_name, also exists for the duration of a user session or the procedure that created the table, however, multiple users can access a global temporary table.


Q) What’s the difference between a Temp Table and a Table Variable in SQL Server?

A) Temporary tables are physical tables created for a particular session; they are automatically deleted when the session terminates. Table variables are stored both in memory and the disk in the tempdb. A temporary tables structure can be altered after creation, a Table variable’s cannot. Table variables can be used in user-defined functions, Temporary tables can’t. Table variables do not allow creation of indexes on them, Temporary Table do. Table variables do not use a read lock, Temporary Tables do.


Q) What is a trigger?

A) A trigger is a special kind of a stored procedure that executes in response to certain action on the table like insert, delete or update of data.


Q) What different types of join can you use in TSQL?

A) The main types are inner join, outer join and cross join, but you can also have a Self-join.


Q) What are the Join operators available in SQL Server.

A) The main join operators are;
Merge It is the most effective of all the join operators. However, it requires all input data to be sorted by the join columns.
Adaptive The optimizer defines a threshold number of rows and then chooses between a Nested Loop or Hash Match join operator at runtime.
Hash Match Creates a hash table (in memory) for required columns for each row then creates a hash for the second table and finds matches on each row. It is very expensive and requires a lot of memory resources.Only really effective for joining very large unsorted (or at least sorted in different ways) datasets/tables.
Nested loop Performs a search on the inner (smaller) table for each row of the outer (larger) table. Less expensive than a Hash Match and ideal for small row inputs, it requires less I/O with fewer rows having to be compared.


Q) Which one of these, Nested Loop, merge join or hash join do you use in query execution?

A) All 3 are used based on the type of tables being joined, merge is the most efficient then nested loop join, both need sorted data. Hash join is good on unsorted tables. SQL Server chooses Nested Loop join when one input set is small and other one is fairly large and indexed on its join column. If two inputs are not small but sorted on the join columns; a merge join is the fastest operation. It requires both tables to be sorted on the merge column. The hash join can efficiently handle large, unsorted and nonindexed inputs. The optimiser chooses the optimal join to use based on a costing of number of io’s and cpu required and this is affected by the size of the tables and indexes etc.


Q) Name 3 things you should monitor on SQL Server.

A) You should monitor the error log for any errors. You should monitor the jobs to make sure they all run as expected/succeed. You should monitor the size of the databases. Another thing would be to monitor the overall load on the server, mostly in terms of CPU but also Memory and Network etc.


Q) SQL Server treats NULL as a value. What does this mean?

A) A value of NULL indicates that the value is unknown. No two NULL values are equal but despite this a Unique Index will only allow one Null value.


Q) How do you manage the definition of a database?

A) You can select and alter many options/definitions of a database, you would either do this via the SQL Server Management Studio but you can also use the alter database command and various other T-SQL commands. You can also set things like collation on the database level as well as recovery model, owner etc.


Q) What is the difference between Simple, Bulk-Logged and Full logging modes?

A) Simple mode does not allow log backups, it automatically reclaims log space. It can’t be used with Log Shipping, Always On, Database Mirroring or point in time recovery.   Full mode requires log backups and allows all the functions which Simple mode doesn’t including point in time recovery with no data loss.  Bulk mode also requires log backups, its similar to Full mode but permits high-performance bulk copy operations whilst reducing log space usage by using minimal logging for most bulk operations. Bulk mode does not allow point in time recovery but it’s useful if the database is subjected to periodic bulk operations, e.g a Data Warehouse environment.


Q) What can you do with a column NVARCHAR(450) that you can’t do with a column NVARCHAR(451)?

A) You can create an index on nvarchar(450) but not on nvarchar(451). This is due to the SQL Server limitation that a SQL Server Index can only be up to 900 bytes.


Q) Can DDL and DML statements be mixed in a transaction?

A) Yes, they frequently are mixed in stored procedures but it’s a good idea to do all the DDL operations (Declare, create table etc) first and then do the DML (Set, select, Insert etc), to avoid any costly recompiles. Not all DDL’s can be mixed though.


Q) What is a schema, for example dbo.MyTable – what is the “dbo” part?

A) In the above example dbo is the schema/owner. A schema is a distinct namespace to facilitate the separation, management, and ownership of database objects.


Q) What is the difference between Recovery Point Objective and Recovery Time Objective?

A) The main difference is in their purpose. Recovery Point Objective is focussed on data and a company’s loss tolerance in relation to the data.  Recovery Time Objective is the target time a company sets for recovery of their IT and Business activities after a disaster has struck.


Q) If the Average Page Life Expectancy is 300 seconds, and you have a machine with 128GB of RAM – what does this mean?

A) It means that you have adequate RAM in the system, as 300 seconds is the recommended minimum value. Specifically, it means that on average a page will stay in the buffer pool for 300 seconds without any references.


Q) What is the cxpacket wait type?

A) The SQL Server CXPACKET wait type is a result of parallel query execution and indicates that a session is waiting for the synchronization of threads involved in the parallel process to complete. If waiting is excessive and cannot be reduced by tuning the query (such as adding indexes), consider adjusting the cost threshold for parallelism or lowering the degree of parallelism.


Q) What is SOS Scheduler yield?

A) SQL Server has multiple threads, when any thread yields itself to another thread, it creates this wait type. If this wait time is very high then it indicates that the CPU is under pressure. You can fix this by giving more CPU to the system or by optimising poorly written cpu hungry queries.


Q) What is a resource semaphore wait type?

A) Simply put it is when a query is waiting to grab some memory to be able to run, it is caused primarily by not having  enough memory available for the sql server or by queries themselves being inefficient and requiring too much memory to run.


Q) What would high wait times in CXPacket indicate in a datawarehouse environment?

A) CXPacket waits represent the time needed for a parallel plan to be executed, as long as it is less than 50% of total waits it shouldn’t be considered as a problem but rather an indicator of parallelism working. High CXPacket values could be an indication that the Cost Threshold for Parallelism is set to low which causes almost all queries to be parallel which for small queries is inefficient. The default value of 5 is almost always too low and should be increased to at least 50 with further tuning advised to find the optimal value. It might also be worth looking at MAXDOP and setting that to less than the total number of cores on the underlying Server. High CXPacket Wait times can also be an indication of uneven data distributions through fragmented indexes or out of date Stats. DataWarehouses benefit a lot from using parallelism so the more cores the better but only for expensive queries.


Q) How would you use wait stats to troubleshoot a performance issue in SQL Server

A) Microsoft added a new Dynamic Management View (DMV), sys.dm_exec_session_wait_stats, in SQL Server 2016. It provides information about wait statistics for an active session. Extended events can also be used to gather wait stats. Wait stats will help pinpoint an issue, for example very high cxpacket waits would indicate badly tuned parallelism where perhaps too many queries run in parallel, whereas SOS_Scheduler_yield would indicate CPU pressure.


Q) What is the difference between a Page latch and a Page IO latch?

A) Page latches are held on the buffer memory, if they are high then it could be an indication of contention in modifying the same page. PageIOLatches specifically refer to IO latch waits, an increase in this value indicates an issue with the IO subsystem, i.e. it takes too long to load pages into memory from disk.


Q) What are the most common wait types you’ll see in SQL Server?

A) ASYNC_NETWORK_IO – Basically just SQL Server waiting for a response from the application, nothing you can do about it.
CXPACKET – Explained elsewhere, to do with parallelism
LCK* – One query is holding locks on an object, and another query is waiting to get locks on that same object. Can be tuned with better indexes or perhaps you have too many indexes.
PAGEIOLATCH_* – SQL Server is waiting to read data pages from storage. These pages weren’t cached in memory, so we have to get them from disk, can be tuned with better indexes or adding more memory or getting faster disks.
SOS_SCHEDULER_YIELD – The SQL Operating System is waiting for a CPU scheduler to yield more time. Can be related to inefficient parallelism setup.
WRITELOG – Users are doing delete/update/insert (DUI) operations, and SQL Server has to write those to the transaction log file before telling the end users that their transaction is committed, can be eased by using delayed durability if you can risk losing some data.


Q) A message in the error log saying that a significant amount of memory has been paged out, why is this happening and how do you prevent it?

A) This would commonly occur when the underlying OS or other applications on the Windows server are being starved of memory resources by the SQL Server, the solution is to ensure you always leave an adequate amount of Memory to the OS, e.g. on an 8GB Windows server leave at least 4 GB to OS.


Q) Why would you disable the write through cache in a RAID controller on a Database Server?

A) If you need to 100% ensure all data is written to disk when committed then you should disable write through caching otherwise if the machine is suddenly shut down you could end up with inconsistent data which the SQL Server thinks it has been written to disk but which in reality was only written to the cache memory.


Q) How would you allocate a mix of RAID 10 and RAID 5 on a SQL Server?

A) RAID 10 is far superior especially in write-heavy scenarios whereas if you have databases or portions of a database which are predominantly read-only then you can get away with using RAID5.


Q) What’s a “numbers table”, and why would you use it?

A) A numbers table serves as a static sequence, it will have a single column, with consecutive numbers from 0 or 1 up to a predefined upper boundary.  They are useful for string manipulation, populating test tables with lots of data, eliminating cursor logic and lots of other tasks which would be hard to do without them.


Q) Does SQL Server have native support for UTF8?

A) It does since SQL Server 2016, one work around in earlier versions was to convert the UTF8 to UTF16.


Q) What is lock pages in memory?

A) Lock Pages in Memory (LPIM) is a Windows policy, it locks the SQL memory in physical memory and prevents it being paged out to improve the system performance and can be very helpful for SQL Server performance. Less important now than it used to be in earlier versions.


Q) What is parameter sniffing?

A) SQL Server uses a process called parameter sniffing when it executes stored procedures that have parameters. When the procedure is compiled or recompiled, the value passed into the parameter is evaluated and used to create the execution plan. Subsequent queries then use this plan which may not fit the new parameters optimally.


Q) What are temporal tables in SQL Server

A) Temporal tables (also known as system-versioned temporal tables), are a type of user table designed to keep a full history of data changes, allowing easy point-in-time analysis. Every temporal table has two explicitly defined date columns, referred to as period columns, they are used exclusively by the system to record the period of validity for each row, whenever that row is modified.  It uses a current table and a (mirrored schema) historical table which automatically stores the previous version of the row each time it gets updated or deleted.

Temporal tables are useful for Auditing data changes, Reconstruction of data to a specific point in time, Calculating trends over time and Maintaining a slowly changing dimension for decision support apps.


Q) What is the difference between Change tracking and CDC (Change Data Capture) in SQL Server

A) Change tracking is light weight tracking used primarily for application synchronisation tasks where the app only needs to know that a change has occurred and what the most recent data is for each database or table, an advantage is that it is synchronous.
CDC maintains a history of row changes, including the actual data that was changed, it does this by asynchronously reading the database’s transaction log via SQL Agent jobs to detect when a change has occurred and storing that information in the tracked table’s associated change table.


Q) What are the benefits of table partitioning along with CDC in large data sets

A) Table partitioning makes good sense in large data sets where you can split the table data into partitions, the majority perhaps being read-only and only a small part needing to be read/write, it makes it easier to back up data and cuts down on contention etc. In terms of table partitioning together with CDC and replication, care must be taken with partition switching and you must ensure that all tables that are involved in partition switching exist at the Publisher and Subscriber, and make sure that the table and partition definitions are the same. Before executing a split or merge operation on a replicated or CDC enabled table you must ensure that the partition in question does not have any pending replicated commands. You should also ensure that no DML operations are executed on the partition during the split and merge operations.


Q) What is batch mode processing in SQL Server?

A) Batch mode execution used to only be available for Columnstore tables, but from SQL Server 2019 onwards there is also “batch mode on rowstore”. It’s a query processing method, and the advantage of this query processing method is to handle multiple rows at a time, chunks of approx. 900 rather than 1 by 1. This approach gains big performance enhancements when a query performs aggregation, sorts, and group-by operations on a large amount of data. It uses significantly less cpu and therefore execution time. It is chosen automatically by the optimiser if deemed beneficial.


Q) What is uniform extent versus mixed extent?

A) Uniform extents are owned by a single object; all eight pages in the extent can only be used by the owning object.
Mixed extents are shared by up to eight objects. Each of the eight pages in the extent can be owned by a different object.
Uniform extents are obviously better in terms of performance. Starting with SQL Server 2016, the default for most allocations in a user database, and tempdb, is to use uniform extents.


Q) What is a federated database?

A) A federation is a group of SQL Servers, typically in Azure, that cooperate to share the processing load of a system. Federated database servers let you scale out a set of servers to support the processing needs of large systems and websites.


Q) What is the difference between the file extensions .mdf and .ndf ?

A) .mdf signifies a primary database file whilst .ndf is a secondary database file.


Q) What is SET QUERY_GOVERNOR_COST_LIMIT used for?

A) For the current session it sets the maximum cost limit of time it will allow a query to execute. Any query cost greater than the maximum value will not be allowed to execute.


Q) What to do if a SQL Server patch fails and the SQL Server won’t come up?

A) The general first step is to reboot the Windows server and try to run the patch again, if there is still an issue then you need to look at the patch log files (%programfiles%\Microsoft SQL Server\nnn\Setup Bootstrap\Log) and take corrective action. You could also try to uninstall the patch. Worst case scenario is to rebuild the SQL Server and restore from backups, but usually the reboot and redeploy works. Another handy trick is to try to start the SQL Server using traceflag 902 then correct/remove whatever might be blocking the patch (e.g SSISDB), which can then be re-added later.




Scenario based questions:

Q) A User can’t connect to a database, what steps would you take to diagnose the issue?


Q) A User calls to inform that an application is running slow, steps to resolve?


Q) Users call again to say it is running slow after an overnight batch, what could be the reasons?


Q) How would you manage a release deployment?


Q) How would you manage the patching of lots of servers?




SSRS DevOps Questions

Q) What is a parameterised report (SSRS)?

A) A parameterized report uses input values to complete report or data processing. Parameters are used in dataset queries to select report data, to filter the result set that the query returns, or to set layout properties used for showing and hiding parts of a report.


Q) What’s the purpose of a dataset in a report?

A) A dataset is how you add data to a report, it defines the data source and what you want to retrieve form it. A dataset contains the information that is needed to retrieve a specific set of data from a data source.


Q) Where would you go to access a reporting services report?

A) You would go to Report Manager ?????


Q) If you wanted a custom function that for example took in a number and returned the name of a shape, how would you put that into a report?

A) You would create this in Report Builder, Report Properties and code.


Q) How would you deploy a reporting services report?

A) You would deploy it via Report Manager, (Native mode deployment). You can also deploy it from a Visual Studio project or you could publish the report to Sharepoint or within custom applications.


Q) How do you develop the report?

A) Report Builder or Visual Studio with BI data tools


Q) How would you create a conditional format is ssrs report?

A) In report Builder you would add an expression into the relevant field’s properties




SSAS DevOps Questions

Q) What is CSV file?

A) A CSV is a comma separated values file, which allows data to be saved in a table structured format.


Q) What is xml file?

A) XML is a file extension for an Extensible Markup Language (XMLfile format used to create common information formats and share both the format and the data on the World Wide Web, intranets, and elsewhere using standard ASCII text.


Q) What are fact and dimension table for and what is the difference between Fact and Dimension tables?

A) The fact table contains business facts (or measures), and foreign keys which refer to candidate keys (normally primary keys) in the dimension tables. Contrary to fact tablesdimension tables contain descriptive attributes (or fields) that are typically textual fields (or discrete numbers that behave like text).


Q) What is the difference in Primary Keys between a natural key and a surrogate key?

A) A natural key is a primary key designed around an existing data column(s). A surrogate key is made up, for example an identity column.


Q) In relation to Kimball methodology, explain the following types of Facts table, Transaction, Periodic snapshot and Accumulating snapshot?

A) These are all types of grain. Transaction grain corresponds to a measurement taken at a single instant eg a grocery shop beep, they can be huge and unpredictably sparse. Periodic snapshots correspond to a predefined span of time, often a financial reporting period. Can also be large but is more predictably dense. Accumulating snapshots correspond to a predictable process that has a well defined beginning and end. These tables are typically smaller as they tend to overwrite data as they process.


Q) What are the types of relation models in SSAS?

A) Tabular and Multidimensional. Under multidimensional you have ROLAP and MOLAP


Q) In SSAS what is XMLA?

A) XML for Analysis (XMLA) is a Simple Object Access Protocol (SOAP)-based XML protocol, designed specifically for universal data access to any standard multidimensional data source residing on the Web.


Q) What types of keys are used to design the facts and dimensions in SSAS cube?

A) Primary(Candidate) and Foreign Keys are the main types but also alternate keys.


Q) What is slowly changing dimension and explain the two types?

A) The term slowly changing dimensions is the variation in dimensional attributes over time. The two main types are overwriting history and preserving history. There is also another type called preserving a version of history.


Q) What is the purpose of audit tables?

A) Audit Tables are used to track transactions against a particular table or tables. Audit Tables are typically filled through the use of Database Triggers


Q) A question related Kimball methodology for datawarehouse design related ?????

A) The dimensional model is built on a star-like schema, with dimensions surrounding the fact table. To build the schema, the following design model is used: Choose the business process, Declare the grain, Identify the dimensions and finally Identify the fact.


Q) When do you need more than one measure?

A) ?? Depends what information you want to return you might want to know the sum and average of a selection of data. Also if you have multiple facts in the same model.

Sybase Interview Questions

ASE Dataserver:

Q) What are the new features of ASE15?

A) ASE15 has a brand new optimizer which gives better performance and is self-tuning. It also introduced smart table/index partitions and on-disk encryption.  It has Advanced System Metrics(Query Metrics), auto updating stats and advanced XML technologies. In addition there is now row level locking on most system catalogs. Showplan has been enhanced to make it easier to read the various steps. ASE15 requires more memory to run because of new in-memory sorting and grouping algorithms and also more disk space because of some new stored procs and changes in the structures of some of the system catalogs.

ASE15.5 Introduced in-memory databases and Relaxed Durability databases and also improvements in backup compression.


Q) What are the major features introduced in ASE 15.7?

A) The major new feature is Threaded kernel mode, it also introduced improvements to compression, specifically in-database compression and also the ability to shrink a transaction log plus the ability to recreate non-materialised non-null columns, i.e you don’t need to rewrite the entire table anymore which required lots of spare space in the database. Also new security and diagnostic features plus performance enhancements with regards to parallelism.


Q) New features in ASE16?

A) Partition locking, Full Database Encryption, Index Compression, Configuration History Tracking, Query Plan Optimisation with Star Joins and Dynamic Thread Assignment. Lock Management Enhancements.


Q) New features in ASE16 SP02

A) Compiled Queries, Transactional Memory, Lockless Data Cache, Latchfree Btree, Non Volatile Cache Management(Using SSD) which all lead to Latency and Throughput benefits.


Q) New features in ASE16 SP03

A) In-memory row store (IMRS) with data row caching (DRC), New indexing schemes like Hash Cached B-tree (HCB) indexing, Multi-version concurrency control (MVCC). All of these new features lead potentialy to a 40x better performance compared to older versions of ASE! Also a new tool – Workload Profiler – enables ASE to automatically recommend configuration changes to move from 100% page-based store to having hot rows in memory that leads to significant improvement in performance


Q) What are the main advantages and disadvantages of using identity columns?

A) The advantage is in the ordering of the table and creation of unique rows, disadvantage is identity gaps.


Q) Can you update an identity column?

A) no – can define the starting number only, and insert


Q) How to bcp out a subset of rows from a large table?

A) Create a view with the subset and bcp out via it.


Q) What does set forceplan on do?

A) Forces the join order for queries to be the order in which the tables appear in the from clause.


Q) How do you find out which process has filled up tempdb?

A) select * from master..syslogshold but you will need to create an additional temporary database and bind it to your login first, if you don’t already have one.


Q) What would you do to improve the performance of tempdb following an install?

A) tempdb is initially installed as 2MB on the master device, so you should extend tempdb onto a new device, or devices, and then update the segmap for the tempdb fragment on the master device to 0.


Q) What are the options for doing an upgrade to a Sybase server?

A) You can do a Sybase upgrade but its safer to create a new server and migrate over the databases/logins etc.


Q) What is the difference between an MRU and an LRU caching strategy?

A) It stands for most recently used and least recently used and defines which records are swapped out first, the default is LRU i.e. the pages least recently used get flushed out when more space is needed. MRU is referred to as fetch and discard and is ideal for queries where a page is only needed once by the query for example table scans on heap tables and also situations where you have more than one table in a join query.


Q) What does the term covered query mean?

A) It is if the columns specified in a query (columns to be returned plus columns in where clauses) are completely satisfied by an index, that index is then called a covered index. It means that the query engine only needs to look up the table once which leads to a much quicker query.


Q) In Adaptive Server explain the difference between a character(RAW) and a block device(Filesystem) and when they are typically used or could be used

A) A character or un-cooked/raw device writes directly to the disk and guarantees transactions are written to the disk immediately. A block or cooked device does not write to the disk immediately, which can cause the database to lose its integrity should there be a media failure. Character devices are normally used for live databases. Using block devices in Tempdb can in some circumstances improve performance for applications that create a lot of temporary tables.

This is becoming less and less relevant as modern OS’s allow the use of DirectIO to a filesystem device which is almost the same as using a raw device.


Q) Briefly explain what the difference is between a transient and hard 605 error.

A) A transient error:

The Operating system notifies Sybase Server that an I/O operation is complete when it is not. Any time buffer information about a page is different from the disk information about the page (and the disk has the correct information).  ~ Normally a dbcc checkdb and dbcc checkalloc should produce no more 605 errors.

A hard error:

Normally hardware related can be caused by in-correctly formatted disks (overlapping partitions) or disk media failure.


Q) When you try to start the server you get a ‘cannot allocate shared memory’ error – why and what do you do?

A) Probably there is still some shared memory allocated, which needs to be released, it can be identified with shmmax in /etc/system


Q) What are the Sybase isolation levels?

A) Sybase ASE has 4 isolation levels, they are dirty reads, read committed (the default), repeatable read and holdlock which is the ANSI default level. The higher the level the greater the effect on locking.


Q) What type of indexes are available in Sybase?

A) Clustered / non-clustered


Q) How do you display index size?

A) sp_spaceused, 1


Q) What type of index is good for range queries with joins?

A) non-clustered


Q) How much space is required for a clustered index build?

A) 120%


Q) What is index covering?

A) Where all columns requested are present in the index, therefore the data pages do not need to be accessed.


Q) How would you defragment a heap table?

A) You could either create a clustered index then drop the index afterwards or bcp all the data out, delete the data from the table and then bcp it back in again.


Q) Explain the difference between slow and fast BCP and how to apply this.

A) Slow BCP leaves indexes in place and is the default. Fast BCP involves dropping the indexes/triggers for the table in question. In Fast BCP the indexes are recreated after the data has been loaded in. The BCP –A flag can be used to increase the default Network packet size. This can increase the speed of a BCP, however the under-lying UNIX o/s needs to be changed to support the larger packet size.  In addition you would need to allocate additional network memory in the Sybase dataserver which is used by the bigger network packets.


Q) If you bcp’d into a table with rules, defaults and triggers would any of these take effect?

A) Only defaults if fields are null.


Q) What are considerations for very large databases?

A) Sybase ASE 15 vastly improved on the previous limits of 256 32GB devices. You can now have almost infinite storage space or to put it this way you can have billions of 4TB devices! The other considerations for very large database are in terms of timings for backups and maintenance tasks. It becomes much more relevant to look at compressions and also partitioning.


Q) Which databases are created on the master device by default?

A) Master, model, tempdb, sybsystemdb, (sybsyntax).


Q) How would you recover from a corrupt master database?

A) You first need to recreate the master database at the same location as before with the same size, this is done with

dataserver –d <device path and name> -w master –z <page size> -b <size in 2k blocks>

Next you can log into the new master database with the default sa login and configure the backup server.

After this it’s a simple matter of loading in the master database from backup.


Q) How to drop corrupted database?

A) sp_repairdb (‘dbname’,’dropdb’)


Q) How to analyse a badly performing query?

A) First you need to isolate the query by looking in sp_who, together with sp_lock and also any long running transactions in master..syslogshold. You then need to extract the actual sql being run by using dbcc sqltext (spid) and view the showplan from sp_showplan (spid).

You can then run a trace against the spid using set tracefile ‘<path to tracefile>’ for <spid>. You can also then set showplan and set statistics io which also gets output to the tracefile.

Its also a good idea to refer to the relevant mda tables and query metrics. When you have isolated the bad sql text you can start to optimise it either by rewriting it or putting in extra indexes etc.

Another cause for sudden bad performance could be fragmentation of the table, especially if a large amount of data has suddenly been added to a table, this can be fixed by dropping and recreating the clustered index or by running a reorg rebuild command on a DOL or APL table.


Q) What variables would you always be adjusting for a production Sybase server?

A) Would need to increase the memory for the server together with procedure and data cache. Also increase number of users, number of objects and indexes. Possibly increase number of online engines and enable parallel processing


Q) How would you tell if a users transaction is rolling back?

A) dbcc traceon(3604) and dbcc stacktrace(spid), dbcc pss(0, spid) and check sysprocesses (io) is incrementing.


Q) Name some ways you can connect to Sybase using perl?

A) DBD::Sybase (via DBI) is probably the best way.


Q) In transact sql explain the purpose of the having clause and when it is typically used?

A) You use it with a group by, it’s kind of like a where clause for the group by statement


Q) An application frequently suffers from deadlocks, suggest a simple technique for the developers to reduce the deadlocks significantly?

A) Change the relevant tables to row level locking and also look to see if the stored proc is running in an opposite direction to another similar stored procedure. 


Q) There is a high level of blocking occurring on a Sybase server in production, list some of the likely causes and suggest ways of addressing the problem?

A) The main cause is probably a badly written query which is keeping locks open too long, but it could also be due to a very busy server with lots of processes accessing the same table all at the same time. Changing the tables to Row-level locking is a quick win, another solution is to optimise the stored procs/queries and perhaps create some new indexes to help on the load.


Q) What is table partitioning and outline some situations where it can be used?

A) Table partitioning is a way of dividing up a table into chunks, it can be very useful in various situations, one is with a heap table where a partitioned table actually ends up having multiple last pages and therefore cuts down on contention. Another benefit is by cutting down on the load of a table by putting partitions on separate disks to reduce disk contention.


Q) An application is stack tracing on selects with page pointer errors in a busy production environments, the table is suspected to be the cause, outline some steps you would take to resolve this situation?

A) Run consistency checks on the table dbcc tablealloc and dbcc checktable, plus liaise with OS and storage people to check that the underlying disks are ok.


Q) Which dbcc(s) would you run on a regular basis?

A) checkalloc, checkdb, checkcatalog, checkstorage.


Q) What is the difference between checkalloc and checkdb?

A) Checkalloc – checks page allocations – (all allocated are used and all used are allocated) Checkdb – checks page chains, index sorts, index pointers


Q) Name some of the Optimization goals in ASE?

A) mixed, oltp and dss


Q) How can SQLIO be used to improve performance?

A) SQLIO is a tool used to check the performance of the SAN and can be used to improve the disk system which Sybase runs on.


Q) What command/utility can be used to check the state of the stats in a table, new in 15.x

A) You can view statistics and histograms with sp_showoptstats. You can also set the option show_missing_stats to on, the optimiser will then print info on columns with no stats.


Q) How would you migrate a database from one page size to another, e.g from 2k page size to 8k?

A) You can do this in two ways, either using the sybmigrate utility or by running ddlgen to extract all the objects and then recreating them in a new database an dbcp’ing in the data.


Q) What is procedure and data cache – how are they used?

A) Procedure cache is for stored procedures. data cache for cached index and data pages.


Q) What is statement cache and how would you configure it on a Sybase server?

A) Statement cache is set on the server level and caches query plans, to save query compilation time. It is set by sp_configure “statement cache size”, size_in_2k_pages


Q) What would be the cause of the error message, “Could not allocate memory using Huge Pages”

A) It is caused by not having enough Huge pages available on the OS, you can check how many are available with cat /proc/meminfo.


Q) What are some of the other server options would you change after a server build?

A) Number of users, memory, no of locks, no of remote connections, no of devices, no of open databases.


Q) What are the four types of page locks?

A) (Logical) Shared – xact reads page and does not want the rows to change the transaction holding the lock has no intention of modifying. No other xact can modify page.
(Logical) Update – xact has read page and may modify the page later. Update locks are promoted to exclusive locks when the xact actually modifies the page. No other xact can modify page.
(Logical) Exclusive – Transaction wants to modify the page. No other xact can read or modify.
(Address) Lock on non-leaf index pages (logical on leaf nonclustered index pages and data pages)


Q) What is lock escalation?

A) When a  transaction holds 200+ page locks it will attempt escalation to table lock.


Q) Why would a transaction not escalate to table lock?

A) Another process has a page lock, multiple re-reads of the same table will look like a transaction has more than 200 – but each select could have 1 index pages.


Q) What is an engine – how many would you configure – what command would you use to change this number?

A) Unix process – usually affiliated with cpu. Must have less than you have cpu’s. Usually either CPU -1 or = cpu if there are a large number (assumes single dataserver machine). sp_configure ‘max online engines’, ‘engines at statup’


Q) How do you create a server.cfg file – if yours had been deleted?

A) sp_configure ‘configuration file’,0,’restore’,”, or just change one of the config options which will generate a new file.


Q) What is fill factor – why is it important, what would be the result of using 0,1 and 100?

A) Fill factor determines how full each page is at initial build, used on index build. 0 (default) 2 free rows are left on each index page. 100 all index pages are 100% full 1-99 various fills


Q) What is page splitting when does it happen?

A) Will get page split when page is 100% full or max-rows per-page reached. This effectively tries to split one page accross two (50:50), the exception is where it is the last page – or there is a 100 % fill factor.


Q) What is the sybmultbuf process, when would you see them – and how many?

A) Spawned by the backup server when dumping a db – one for each dump or dump strip.


Q) How are database fragments in sysusages linked to devices in sysdevices?

A) The vstart in sysusages falls between low and high in sysdevices, but really just do sp_helpdb and sp_helpdevice, much easier.


Q) If you have the ‘trunc log on checkpoint’ option set when will the log get truncated?

A) A manual checkpoint flushes dirty pages to disk, a system checkpoint will also truncate logs if dboption is set.


Q) If you were asked to take a dump from the production environment and load it into development what steps would you take – or problems would you look out for?

A) You would dump the database copy it across and load it, in earlier versions you would need to ensure the segmaps aligned between the databases but this is not an issue any more. Mismatched User ids and aliases could be a concern as is the size of the databases.


Q) What non-logged operation still allows you to dump and load a transaction log?

A) truncate table


Q) What should your housekeeping scripts do?

A) dbcc, dump, update stats, drop rebuild/reorg indexes.


Q) What does update stats do – why would you run it, how would you check when it had last been run?

A) Updates statistics held about tables and indexes – in particular the distribution, step and density. You’d run it to help the optimiser make better decisions and you can check when its last run you can check in table sysstatistics.


Q) Why would you rebuild indexes?

A) To defragment – save space and increase performance


Q) How would you know how fragmented a table / index is?

A) Run sp_spaceused and compare with sp_estspace


Q) How would you shrink a database?

A) Run alter database dbname off old_dev


Q) How does theserver optimise queries with more than four tables?

A) Tables are costed by grouping into fours, ie abcd bcde, the best outer table is removed, and the normal process is taken. Can be changed by ordering tables biggest to smallest – or changing ‘set table count x’. – max value of 8.


Q) What is meant by reformatting?

A) Where plan is very costly – ie no indexes etc… Optimiser may decide to reformat – create work table, populate with relevant cols, create clustered index, use index.


Q) What is meant by the ‘or strategy’?

A) OR stategy is used only when all columns are indexes, and the query could return duplicate rows. A special work table is created (dynamic index – in showplan) to get distinct values only – using row ids.


Q) When are work tables created automatically?

A) Order by, Sorts, group by, distinct, reformatting, or strategy, other worktables


Q) What is a direct update and how is it different to a deferred?

A) SAP ASE performs direct updates in a single pass, by locating the affected index and data rows, writing records for the changes to the transaction log, and making the changes to affected data and index pages. There are In-place updates, cheap direct updates and expensive direct updates, all better than deferred updates.


Q) What is required for an In-place update (best)?

A) Row cannot change its length, cannot be the key of a clustered index, no joins, not used for ref integ, no trigger on column, no replication.


Q) What causes deferred updates?

A) Update that use joins, Update to columns that use referential integrity, update to the index keys.


Q) Why run sp_recompile on a stored proc?

A) To parse and normalise the sp with new statistics.


Q) What causes a stored procedure to recompile?

A) 1. Table no longer exists with objid or dbid, 2. Has an Index that has been dropped, 3. Any new columns / rules / defaults (sysobjects.schema+1), 4. Does the db have the same crdate, 5. Has db been re-loaded.


Q) Does recompile compile both tree and plan?

A) Just the plan


Q) Which traceflags are good to use for tuning queries?

A) 302(Print trace information on index selection), 310(Show the plan as choosen by the optimizer, as well as I/O costs), 311 (Display optimizers expected I/O cost)


Other questions

Q) If a user db is in log suspend mode, what do you do ?
Q) What if there is a secondary truncation point and that is the oldest tran ?”




Replication server:

Q) What is the Save interval used for pertaining to the Stable Queue.

A) It determines how long the stable queue, DSI, will wait until it deletes information already applied to the replicate db. It is specified in the connection string via parameter save_interval (in minutes). It can be used for redundancy in the event of a replication system failure. The save interval lets the sending Replication Server re-send all messages saved during the save interval. You can use sysadmin restore_dsi_saved_segments to restore backlogged transactions.


Q) What Replication Server parameters should always be increased from the default values.

A) The 2 most important are memory_limit which sets the total memory available to the Replication server and sqt_max_cache_size which sets the limit of cache which is available to the stable queue. It’s also a good idea to cache the system tables and also consider parallel dsi threads. You would also look to adjust the settings for the rep agent, specifically the send_buffer_size and scan_batch_size.


Q) When a DSI Thread suspends due to a transactional error, what are the two methods used to force resumption of the queue.

A) Resume connection to dataserver.database skip transaction

set autocorrection on for replication_definition with replicate at data_server.database


Q) What Replication Server command shows all the configuration parameters pertaining to a Replication Server.

A) rs_configure – run in RSSD for Replication Server, i.e. from the ASE holding the RSSD database.


Q) What is the ID Server in a replication environment?

A) It is the master replication server which holds information on all the other replication servers.


Q) What happens if the ID Server goes down in a multi repserver environment.

A) It runs ok until you try and update any rep defs etc or restart any of the other replication servers.


Q) How would you find what command caused a dsi to go into suspend?

A) Check the rep server error log.


Q) If the error log indicates that dsi is down due to duplicate key error what do you do?

A) First do a sysadmin log_first_tran to output the bad transaction, it can then be viewed with rs_helpexception. If you decided it can be skipped then do a resume connection with skip_transaction.


Q) What is autocorrection?

A) It is an option set on a rep def which converts updates and inserts into a delete followed by an insert, this is useful in dev environments where you tend to sometimes get a lot of duplicate transactions.


Q) How do you determine the time of the last transaction applied to the replicate database?

A) select * from rs_lastcommit (executed in replicate database)


Q) What is the easiest way to completely replicate a database from London to Singapore and New York?

A) Use MSA, database replication definitions.


Q) When was MSA (database replication definition) introduced?

A) Version 12.6.


Q) What is replication monitor?

A) Replication Monitoring Services was introduced in version 15 and replaced the RSM server, it consists of the UAF and the Replication Manager plugin for Sybase central


Q) What are the steps to refresh, from a production database dump, a primary development database which is being replicated to a secondary development database?

A) The steps are as follows:

– Get the details of db configuration. – get sp_config_rep_agent details
– Get gen_id details – run dbcc gettrunc to get gen_id details.
– Stop rep agent with sp_stop_rep_agent
– Load database from dump.
– Configure the rep agent back as per details in step1.
– Dbcc settrunc(ltm,ignore)
– Dump tran database with truncate_only

– (Optional) Dump and load this db into replicate database(s) if you need an exact copy of the data.
– Dbcc settrunc(‘ltm’, ‘valid’)
– Dbcc settrunc(‘ltm’,’gen_id’, gen_id+1 from step 2)
– On RSSD run rs_zero_ltm data_server, database
– sp_start_rep_agent database


Q) Does MSA replication work over a WAN?

A) Yes I think so, I can’t see why not, warm standby works over WAN.


Q) Transatlantic replication New York to London one-way, what are the steps to troubleshoot it if the data is not being replicated across?

A) First step is to check the replication server error log, followed by looking at the replication queues with admin who,sqm. If you are replicating from one replication server to the next then you obviously need to check this on both sides to see where the issue is arising. You also need to check the error log of the replicant database to make sure the database is up and running. Also check that the repagent is up and running on the primary. One quick test is to see if you can connect to replicant database/repserver from the primary to test the communication is up but you would see this from the error logs as well. It could well be that the cause of the issue is a duplicate transaction so investigate that and if necessary resume connection to replicant database with skip_transaction.


Q) What is the command to see how far the repagent has read through the log on the primary?

A) dbcc gettrunc and look at secondary trunk page. Could also look at sp_help_rep_agent and also check on the queues in the replication server admin with who,sqm to see if any are lagging behind etc.


Q) What is the benefit of having 2 replication servers when replicating across 2 countries?

A) It is more efficient as the 2 replication servers talk in a more concise language than a dataserver and replication server do, for example they strip out empty begin commit trans. It also gives better resiliency against temporary network outages and cuts down on network contention.


Q) What command would you use to check which connections are up/down?

A) admin who / admin who is down


Q) What are the stages to creating a subscription?

A) define, activate, valiadate, check


Q) Where would you find the valid logins for the rep server?

A) rssd..rs_users


Q) What must a table have to be replicated using warm standby?

A) primary key

Replication – Set up MSA Rep

The first thing to do is to add the dataserver connection settings into the Replication server interfaces file.
You also need to add the replication interfaces file entry into the primary dataserver interfaces file

You then need to make sure the maint users are created in the primary and replicant servers, add the password details into the .sybinfo file

Next grab the passwords for sybadmin for both servers and also the rs SA and Repagent logins

Make sure that there is no repagent/secndary truncation point configured on the primary, if there is then remove them with sp_config_rep_agent sierra, “disable” and dbcc settrunc (ltm,ignore)

You should now be ready to run rs_init for both the primary and replicant

After the connections are created you need to disable the firing of any triggers on the replicant side with:
alter connection to .
set ‘dsi_keep_triggers’ to ‘off’

Reptostandby does not need to be executed. We only replicate at the table level and not the db level. So DDL changes are not replicated.

So set any individual tables for replication in the primary database with:
sp_setreptable fnxaudittnum, true
sp_setreptable fnxauditdet, true
sp_setreptable fnxauditgrpsub, true
sp_setreptable fnxauditgrp, true
sp_setreptable fnxaudtype, true
sp_setreptable fnxaudit, true

If it is not already running Start up the Repagent on the primary and ensure it is up and running (sp_help_rep_agent) and also check in Repserver.

The repdef, subscription and any function string class scripts etc are stored in directory /sybdba/sybase/dba/replication so create them first, perhaps copy from somewhere else.

Once they are ready execute first the database repdef and function_string class script against the Repserver

Next create the subscription with dump_marker from the script above

Now we need to dump and load the database

Dumps seem to be located in directories under for example /datlib///

so just create a new subdirectory, like sierra_DS16-19, and on the dataserver issue the dump with the following:
dump database sierra to ‘/datlib/cam_sybdmp_uat01//.dmp.s1’
stripe on ‘/datlib/cam_sybdmp_uat01//.dmp.s2’
stripe on ‘/datlib/cam_sybdmp_uat01//.dmp.s3’
with compression=100
go

Replication – Remove MSA/RepAgent

Remove MSA Database

1) Drop Subscription
2) Drop Rep Def
3) Stop Rep agent
4) Disable Rep Agent, sp_config_rep_agent , ‘disable’
5) Check that secondary truncation point is rmeoved
6) Alter connection to set log transfer off

.

Remove a RepAgent

– First drop any subscriptions and then rep defs associated with the primary Database where you want to drop the RepAgent
You can check this with rs_helpsub and rs_helprep formthe RSSD

– Drop any Subscriptions and RepDefs with e.g drop replication definition for a table rep def or equivalent commands for Database rep defs.

– Next disable the repagent from the Primary DB with sp_config_rep_agent sierra, “disable”

– Finally on the RepServer disable replication/log transfer from Primary DB with alter connection to SYBEMA_SIEPCE_UAT_DS02.sierra set log transfer off

Replication check repdef/subscription info

Run these in the relevant RSSD database for the Repserver (ID):
admin config, id_server – To find the Rep ID Server
admin rssd_name – To find RSSD database for RepServer

rs_helprep – Displays a list of table and function repdefs in current RSSD

rs_helpsub – Displays a list of all subscriptions to table and function repdefs in the current replicate RSSD.

rs_helprepdb – Displays list of all replicate databases with subscriptions to primary data in current Repserver

rs_helpdbrep – Displays all database repdefs in the current RRSD

rs_helpdbsub – Displays all subscriptions to database repdefs in current, replicate RSSD.

Tempdb full – recovery

If you have a situation where the tempdb is full then you can normally bind your login to the sa_tempdb or equivalent, here are the steps on how to do this:

– Do a select * from sysdatabases to see if there are any extra tempdb’s configured, if there aren’t any then you can easily create one as follows:
create temporary database to_tempdb on =’50M’ log on =’50M’

– Bind your elevated login to the extra tempdb with:
sp_tempdb ‘bind’, ‘LG’, zec3yit, ‘DB’, sa_tempdb, NULL, ‘soft’;

– Then log out and back in again and you should be able to do sp_who and other stored procs again.

– To try to clear the tempdb look for long running transactions and kill them:
select * from master..syslogshold

– You can also try to abort all transactions in log suspend on the tempdb with this command:
select lct_admin(“abort”, 0, 2)

Kill Logins proc/script

To Kill any logins before doing for example a failover use stored proc sp__kill_all:
e.g.
sp__kill_all
or
sp__kill_all ‘kill_m_all’ — which kills all the user logins on the Server.

If you just want to kill Logins for a particular database, if you are loading into it for example, then there is another stored proc sp__kill_db
e.g
sp__kill_db

.

create procedure dbo.sp__kill_all
/* Copyright (c) 2003 Rob Verschoor/Sypron B.V. */
@p_login varchar(30)
as
begin
declare @p int, @cmd varchar(12), @suid int
declare @kpid int, @spidcmd varchar(16), @status varchar(12)
declare @login varchar(30), @xactyn varchar(3)
declare @rc int, @err int
set nocount on

if proc_role(“sa_role”) = 0
begin
print “You need ‘sa_role’ to run this procedure.”
return 0
end

if @p_login = “?”
begin
print ” ”
print “Usage: sp__kill_all { login_name | ‘kill_m_all’ }”
print “”
print “This procedure kills multiple ASE processes as specified:”
print ” ”
print “Arguments:”
print ” login_name – kills all processes for this login name”
print ” ‘kill_m_all’ – kills all processes”
print “”
print “Copyright (c) 2003 Rob Verschoor/Sypron B.V.”
print “Visit http://www.sypron.nl
print ” ”
return 0
end

select @suid = NULL
if @p_login != “kill_m_all”
begin
select @suid = suser_id(@p_login)
if @suid = NULL
begin
print “‘%1!’ is not a valid login name”, @p_login
return 0
end
end

select spid, kpid, status, cmd, suid, tran_name
into #to_be_killed
from master..sysprocesses
where suid = isnull(@suid, suid)
and suid != 0
and spid != @@spid
select @rc = @@rowcount, @err = @@error

if @p_login != “kill_m_all”
print “Found %1! spids for login ‘%2!’”, @rc, @p_login
else
print “Found %1! spids”, @rc

/* Adaptive Server has expanded all ‘*’ elements in the following statement */ select #to_be_killed.spid, #to_be_killed.kpid, #to_be_killed.status, #to_be_killed.cmd, #to_be_killed.suid, #to_be_killed.tran_name into #tmp_2bkilled
from #to_be_killed

while 1=1
begin
set rowcount 1 — process row by row
select @p = spid, @kpid = kpid, @spidcmd = rtrim(cmd),
@xactyn = case tran_name when NULL then “no” when “” then “no” else “yes” end,
@status = rtrim(status), @login = suser_name(suid)
from #tmp_2bkilled
if @@rowcount = 0 break — exit loop when ready

delete #tmp_2bkilled where spid = @p
set rowcount 0

print “Killing spid %1! (login ‘%2!’; %3!,%4!; active Xact:%5!)”, @p, @login, @status, @spidcmd, @xactyn
select @cmd = “kill ” + str(@p, 5)
exec(@cmd)

— In pre-12.0, CIS can be used instead of exec-immediate. See page 60 in the book
— “Tips, Tricks & Recipes for Sybase ASE” (www.sypron.nl/ttr)
–exec sp_remotesql SYB_ALIAS, @cmd
end

set rowcount 0

— wait a sec and see who’s still there
waitfor delay “00:00:01”

select t.spid, t.kpid, t.status, t.cmd, t.suid
into #still_there
from master..sysprocesses sp, #to_be_killed t
where sp.spid = t.spid
and sp.kpid = t.kpid
select @rc = @@rowcount, @err = @@error

if @rc != 0
begin
print “”
print “%1! killed processes are still around:”, @rc
select spid, suser_name(suid) login, cmd, status
from #still_there
order by spid

print ” ”
print “Wait a while until these processes have disappeared…”
end
end

go

.

.

create procedure dbo.sp__kill_db (@DATABASE varchar (30)) as
set ansinull on
set close on endtran on
set flushmessage on
set nocount on
set string_rtruncation on

if (@@trancount = 0)
begin
set chained off

if (@@isolation > 1)
begin
set transaction isolation level 1
end
end
else
begin
print ‘ sp__kill_db CANNOT BE RUN FROM WITHIN A TRANSACTION.’

print ‘ ‘

return 1
end

declare @cmdstring varchar (255),
@errorchar varchar ( 4),
@id int,
@login varchar ( 30),
@spid smallint,
@spidchar varchar ( 11)

print ‘ ‘

declare killdb_cursor cursor for
select spid, suser_name (suid)
from master.dbo.sysprocesses
where dbid = db_id (@DATABASE)
and dbid > 1
and spid @@spid
order by spid
for read only

if (charindex (‘sa_role’, show_role ()) > 0)
begin
if (@DATABASE = ‘master’)
begin
print ‘ YOU CANNOT EXECUTE sp__kill_db FOR DATABASE master.’

print ‘ ‘

return 1
end
else if (db_id (@DATABASE) IS NULL)
begin
print ‘ DATABASE %1! DOES NOT EXIST.’, @DATABASE

print ‘ ‘

return 1
end
else
begin
open killdb_cursor

fetch killdb_cursor into @spid, @login

if (@@sqlstatus = 2)
begin
print ‘ THERE ARE NO ACTIVE PROCESSES FOR DATABASE %1!.’, @DATABASE
end
else
begin
print ‘ SPIDS WILL BE KILLED FOR DATABASE %1!.’, @DATABASE

print ‘ ‘

while (@@sqlstatus = 0)
begin
select @spidchar = convert (varchar (11), @spid)

select @cmdstring = ‘kill ‘ + @spidchar

print ‘ KILLING SPID %1! FOR LOGIN %2!.’,
@spidchar, @login

execute (@cmdstring)

fetch killdb_cursor into @spid, @login
end

print ‘ ‘

if (@@sqlstatus = 2)
begin
— execute sp_status @PARM=@DATABASE
print ‘ ALL APPROPRIATE PROCESSES HAVE BEEN KILLED.’
end
else
begin
select @errorchar = convert (varchar (4), @@sqlstatus)

print ‘ CURSOR ERROR. STATUS = %1!.’, @errorchar

return 1
end
end

close killdb_cursor

deallocate cursor killdb_cursor
end
end
else
begin
print ‘ ONLY A SYSTEM ADMINISTRATOR CAN EXECUTE sp__kill_db.’

print ‘ ‘
end

go

Elevate a user’s permissions for a proc

If a user has a proc which needs to do for example a truncate within then the user needs ot have elevated rights, typically dbo owner, to work, if thi si snot possible then you need to use a wrapper stored proc to temporarily elevate the user’s access while running the proc.

The steps are as follows:

– create role sii_role with passwd ‘********’

– grant role sa_role to sii_role

– grant role sii_role to — this gets added as inactive by default so needs to be set to on, i.e its safe

– Create the wrapper stored proc, e.g:
create procedure iSP_bcp_ts_gross_trade_audit_wrap
(
@replay_mode char(1) = NULL –The same variables as in the stored proc being called
)
as
begin
/*
This proc calls SP_bcp_ts_gross_trade_audit.
Call order is:
iSP_bcp_ts_gross_trade_audit_wrap
iSP_bcp_ts_gross_trade_audit
*/

set nocount on

— enable sa_role, DBA’s to replace BLAH with real password and run sp_hidetext
set role sii_role with passwd “********” on
–We have sa_role so run original code
exec iSP_bcp_ts_gross_trade_audit @replay_mode
— disable sa_role
set role sii_role off

return 0
end
GO

– sp_hidetext iSP_bcp_ts_gross_trade_audit_wrap

– grant execute on iSP_bcp_ts_gross_trade_audit_wrap to

– The user can then execute this new wrapper stored proc as they did the old stored proc and it should work fine.

Extract Users Permissions Script

#!/bin/ksh
# *******************************************************************************
#
# Filename: Extract_Users_Permissions.sh
# ———
#
# Description: This script uses ddlgen to extract all the users prior to a refresh.
# ———— These users can then easily be re-added afterwards.
#
# ———–
#
#
# *****************************************************************************/

# Source variables
SERVER=$1
DATABASE=$2
USERNAME=sa
PASSWORD=`/sybdba/sybutils/dba/utils/*******.ksh ${SERVER} ${USERNAME}`
INTERFACES=”/home/sybase/interfaces”
COLLECTION_DATE=`date +”%Y%m%d_%H%M%S”`
BASEDIR=/tmp/USER_EXTRACT
TMPDIR=”${BASEDIR}/tmp”
OUTPUTDIR=”${BASEDIR}/output”
ALIASFILE=”${SERVER}_${DATABASE}_${COLLECTION_DATE}_alias_list.tmp”
LOGFILE=”${SERVER}_${DATABASE}_${COLLECTION_DATE}_logfile.out”
ISQL=”/home/sybdba/ase_16.04-pl02/OCS-16_0/bin/isql -X”

# Check Usage
if [ $# != 2 ]
then
echo “Usage : $0 ”
exit 0
fi

# Check directories exist etc
if [ ! -d “${BASEDIR}” ]; then
mkdir ${BASEDIR}
fi
if [ ! -d “${TMPDIR}” ]; then
mkdir ${TMPDIR}
fi
if [ ! -d “${OUTPUTDIR}” ]; then
mkdir ${OUTPUTDIR}
fi
if [ -f “${OUTPUTDIR}/${SERVER}_${DATABASE}” ]; then
rm -f ${OUTPUTDIR}/${SERVER}_${DATABASE}
fi

# Generate sp_drop and adduser statements
echo “———————————————————-” >> ${OUTPUTDIR}/${SERVER}_${DATABASE}
echo “– USERS SECTION” >> ${OUTPUTDIR}/${SERVER}_${DATABASE}
echo “———————————————————-” >> ${OUTPUTDIR}/${SERVER}_${DATABASE}
${ISQL} -U${USERNAME} -P${PASSWORD} -w999 -S${SERVER} -D${DATABASE} -I${INTERFACES} </dev/null
set flushmessage on
set nocount on
go
select “sp_dropuser ” + u.name + CHAR(10) + “GO”
from sysusers u, sysusers g
where u.gid *= g.uid
and u.uid between 4 and 16382
go
select “sp_adduser ” + u.name + “, ” + u.name + “, ” + g.name + CHAR(10) + “GO”
from sysusers u, sysusers g
where u.gid *= g.uid
and u.uid between 4 and 16382
go
END

# Extract the aliases
${ISQL} -U${USERNAME} -P${PASSWORD} -w999 -S${SERVER} -D${DATABASE} -I${INTERFACES} </dev/null
set flushmessage on
set nocount on
go
select l.name from master..syslogins l, sysalternates a
where a.suid=l.suid
and a.altsuid=1
go
END2

# Run sql to extract User permissions
echo ” ”
echo “Extracting user permissions”
echo “———————————————————-” >> ${OUTPUTDIR}/${SERVER}_${DATABASE}
echo “USERS PERMISSIONS” >> ${OUTPUTDIR}/${SERVER}_${DATABASE}
echo “” >> ${OUTPUTDIR}/${SERVER}_${DATABASE}
${ISQL} -U${USERNAME} -P${PASSWORD} -w999 -S${SERVER} -D${DATABASE} -I${INTERFACES} </dev/null
set flushmessage on
set nocount on
go
select case when a.protecttype =1 then “GRANT” end, ” “,
case when a.action=151 then “references”
when a.action=193 then “select”
when a.action=195 then “insert”
when a.action=196 then “delete”
when a.action=197 then “update”
when a.action=224 then “execute”
when a.action=282 then “delete statistics”
when a.action=320 then “truncate table”
when a.action=326 then “update statistics”
when a.action=368 then “Transfer Table”
end, ” on “, convert(varchar(50),object_name(a.id)), ” to”, user_name(a.uid) + CHAR(10) + ‘GO’
from sysprotects a, sysobjects b, sysusers c
where a.id=b.id
and a.uid=c.uid
and b.type in (‘U’, ‘P’, ‘V’)
and c.uid between 4 and 16382
and c.suid >= 0
order by 3
go
END3

# Add in any Aliases
echo ” ”
echo ” ” >> ${OUTPUTDIR}/${SERVER}_${DATABASE}
echo “———————————————————-” >> ${OUTPUTDIR}/${SERVER}_${DATABASE}
echo “– ALIASES SECTION” >> ${OUTPUTDIR}/${SERVER}_${DATABASE}
echo “———————————————————-” >> ${OUTPUTDIR}/${SERVER}_${DATABASE}
for alias_user in `cat ${TMPDIR}/${ALIASFILE}`
do
echo “Creating sp_addalias for Alias ${alias_user}”
echo “sp_addalias ${alias_user}, dbo” >> ${OUTPUTDIR}/${SERVER}_${DATABASE}
echo “go” >> ${OUTPUTDIR}/${SERVER}_${DATABASE}
echo ” ”
echo ” ”
done

#
#
# NOW RESTORE THE DATABASE
#
# Recreate the users and aliases by running in the ddlgen extracts from above
echo “Recreate the users and aliases by running the following and supplying the password:”
echo “isql -X -U${USERNAME} -w999 -S${SERVER} -D${DATABASE} -I${INTERFACES} -i${OUTPUTDIR}/${SERVER}_${DATABASE}”
echo ” “